Repository: dato-code/PowerGraph
Branch: master
Commit: a038f975cf2a
Files: 757
Total size: 6.0 MB

Directory structure:
gitextract_mlg8j482/

├── .gitignore
├── BINARY_README
├── CMakeLists.txt
├── Doxyfile
├── Doxyfile_internal
├── README.md
├── TUTORIALS.md
├── apps/
│   ├── CMakeLists.txt
│   ├── cascades/
│   │   ├── CMakeLists.txt
│   │   └── cascades.cpp
│   ├── concomp/
│   │   ├── CMakeLists.txt
│   │   └── concomp.cpp
│   ├── example/
│   │   ├── CMakeLists.txt
│   │   └── hello_world.cpp
│   └── label_propagation/
│       ├── CMakeLists.txt
│       └── label_propagation.cpp
├── cmake/
│   ├── FindAnt.cmake
│   ├── FindBoost.cmake
│   ├── FindCUDA/
│   │   ├── make2cmake.cmake
│   │   ├── parse_cubin.cmake
│   │   └── run_nvcc.cmake
│   ├── FindCUDA.cmake
│   ├── FindLibJpeg.cmake
│   ├── FindMPICH2.cmake
│   ├── FindMatlab.cmake
│   ├── FindPerftools.cmake
│   ├── Mex_stub.cpp
│   └── mex_link.sh
├── configure
├── cxxtest/
│   ├── cxxtest/
│   │   ├── Descriptions.cpp
│   │   ├── Descriptions.h
│   │   ├── DummyDescriptions.cpp
│   │   ├── DummyDescriptions.h
│   │   ├── ErrorFormatter.h
│   │   ├── ErrorPrinter.h
│   │   ├── Flags.h
│   │   ├── GlobalFixture.cpp
│   │   ├── GlobalFixture.h
│   │   ├── Gui.h
│   │   ├── LinkedList.cpp
│   │   ├── LinkedList.h
│   │   ├── Mock.h
│   │   ├── ParenPrinter.h
│   │   ├── QtGui.h
│   │   ├── RealDescriptions.cpp
│   │   ├── RealDescriptions.h
│   │   ├── Root.cpp
│   │   ├── SelfTest.h
│   │   ├── StdHeaders.h
│   │   ├── StdTestSuite.h
│   │   ├── StdValueTraits.h
│   │   ├── StdioFilePrinter.h
│   │   ├── StdioPrinter.h
│   │   ├── TeeListener.h
│   │   ├── TestListener.h
│   │   ├── TestMain.h
│   │   ├── TestRunner.h
│   │   ├── TestSuite.cpp
│   │   ├── TestSuite.h
│   │   ├── TestTracker.cpp
│   │   ├── TestTracker.h
│   │   ├── ValueTraits.cpp
│   │   ├── ValueTraits.h
│   │   ├── Win32Gui.h
│   │   ├── X11Gui.h
│   │   ├── XUnitPrinter.h
│   │   ├── XmlFormatter.h
│   │   ├── XmlPrinter.h
│   │   ├── YesNoRunner.h
│   │   ├── __init__.py
│   │   ├── __release__.py
│   │   ├── cxx_parser.py
│   │   ├── cxxtest_fog.py
│   │   ├── cxxtest_misc.py
│   │   ├── cxxtest_parser.py
│   │   └── cxxtestgen.py
│   ├── cxxtestgen
│   └── python/
│       ├── README.txt
│       ├── convert.py
│       ├── cxxtest/
│       │   ├── __init__.py
│       │   ├── __release__.py
│       │   ├── cxx_parser.py
│       │   ├── cxxtest_fog.py
│       │   ├── cxxtest_misc.py
│       │   ├── cxxtest_parser.py
│       │   └── cxxtestgen.py
│       ├── python3/
│       │   ├── cxxtest/
│       │   │   ├── __init__.py
│       │   │   ├── __release__.py
│       │   │   ├── cxx_parser.py
│       │   │   ├── cxxtest_fog.py
│       │   │   ├── cxxtest_misc.py
│       │   │   ├── cxxtest_parser.py
│       │   │   └── cxxtestgen.py
│       │   └── scripts/
│       │       └── cxxtestgen
│       ├── scripts/
│       │   └── cxxtestgen
│       └── setup.py
├── demoapps/
│   ├── CMakeLists.txt
│   ├── dsl/
│   │   ├── CMakeLists.txt
│   │   ├── gen_impl.cpp
│   │   ├── gl_server.cpp
│   │   ├── graph_typedefs.gen
│   │   ├── impl.graphlab
│   │   └── scala_impl/
│   │       └── dsl.scala
│   ├── pagerank/
│   │   ├── CMakeLists.txt
│   │   ├── simple_pagerank.cpp
│   │   ├── warp_engine_pagerank.cpp
│   │   └── warp_parfor_pagerank.cpp
│   └── rpc/
│       ├── CMakeLists.txt
│       ├── dht_performance_test.cpp
│       ├── fiber_future_test.cpp
│       ├── obj_fiber_future_test.cpp
│       ├── rpc_call_perf_test.cpp
│       ├── rpc_example1.cpp
│       ├── rpc_example2.cpp
│       ├── rpc_example3.cpp
│       ├── rpc_example4.cpp
│       ├── rpc_example5.cpp
│       ├── rpc_example6.cpp
│       ├── rpc_example7.cpp
│       ├── rpc_example8.cpp
│       └── rpc_example9.cpp
├── dist/
│   └── README
├── doc/
│   ├── README
│   └── images/
│       ├── cycle_triangle.dot
│       ├── example_webgraph.dot
│       ├── graph_format_example.dot
│       ├── images.pptx
│       ├── in_triangle.dot
│       ├── out_triangle.dot
│       └── through_triangle.dot
├── license/
│   ├── LICENSE.txt
│   ├── LICENSE_prepend.txt
│   ├── corporate_CLA.txt
│   └── individual_CLA.txt
├── matlab/
│   └── eventlog_parser.m
├── patches/
│   ├── boost.patch
│   ├── libbz2_fpic.patch
│   ├── libevent_clean_and_remap.sh
│   ├── libevent_remap_file.txt
│   ├── libhdfs.patch
│   ├── libjson.patch
│   ├── opencv_apple_rpath.patch
│   ├── tcmalloc.patch
│   └── zookeeper/
│       ├── Makefile
│       └── configure
├── scripts/
│   ├── add_line_to_eof.sh
│   ├── binary_list.txt
│   ├── build_linux_static.sh
│   ├── build_linux_static_no_jvm.sh
│   ├── build_linux_static_no_jvm_no_mpi.sh
│   ├── build_osx_static.sh
│   ├── compile_static_release.sh
│   ├── ec2/
│   │   ├── benchmark_ec2.sh
│   │   ├── gl-ec2
│   │   ├── gl_ec2.py
│   │   └── readme
│   ├── ec2_tools/
│   │   ├── scatter
│   │   ├── setup-hadoop
│   │   └── setup-torque
│   ├── install_graphlab.sh
│   ├── license_prepend.sh
│   ├── linux_run_script_no_jvm_template.sh
│   ├── linux_run_script_template.sh
│   ├── make_all_docs.sh
│   ├── make_dist.sh
│   ├── mpi_redirect_stdout.sh
│   ├── mpirsync
│   ├── rpcexec.py
│   └── test_dist.sh
├── src/
│   ├── CMakeLists.txt
│   ├── graphlab/
│   │   ├── CMakeLists.txt
│   │   ├── aggregation/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── aggregation_includes.hpp
│   │   │   └── distributed_aggregator.hpp
│   │   ├── docs/
│   │   │   ├── faq.dox
│   │   │   ├── overview.dox
│   │   │   ├── using.dox
│   │   │   └── using_warp.dox
│   │   ├── engine/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── async_consistent_engine.hpp
│   │   │   ├── distributed_chandy_misra.hpp
│   │   │   ├── engine_includes.hpp
│   │   │   ├── execution_status.hpp
│   │   │   ├── iengine.hpp
│   │   │   ├── message_array.hpp
│   │   │   ├── omni_engine.hpp
│   │   │   ├── synchronous_engine.hpp
│   │   │   ├── warp_engine.hpp
│   │   │   ├── warp_graph_broadcast.hpp
│   │   │   ├── warp_graph_mapreduce.hpp
│   │   │   ├── warp_graph_transform.hpp
│   │   │   └── warp_parfor_all_vertices.hpp
│   │   ├── graph/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── builtin_parsers.hpp
│   │   │   ├── distributed_graph.hpp
│   │   │   ├── dynamic_local_graph.hpp
│   │   │   ├── graph_basic_types.hpp
│   │   │   ├── graph_formats.dox
│   │   │   ├── graph_gather_apply.hpp
│   │   │   ├── graph_hash.hpp
│   │   │   ├── graph_includes.hpp
│   │   │   ├── graph_ops.hpp
│   │   │   ├── graph_storage_deprecated.hpp
│   │   │   ├── graph_vertex_join.hpp
│   │   │   ├── ingress/
│   │   │   │   ├── distributed_batch_ingress.hpp
│   │   │   │   ├── distributed_constrained_batch_ingress.hpp
│   │   │   │   ├── distributed_constrained_oblivious_ingress.hpp
│   │   │   │   ├── distributed_constrained_random_ingress.hpp
│   │   │   │   ├── distributed_hdrf_ingress.hpp
│   │   │   │   ├── distributed_identity_ingress.hpp
│   │   │   │   ├── distributed_ingress_base.hpp
│   │   │   │   ├── distributed_oblivious_ingress.hpp
│   │   │   │   ├── distributed_random_ingress.hpp
│   │   │   │   ├── ingress_edge_decision.hpp
│   │   │   │   └── sharding_constraint.hpp
│   │   │   ├── local_edge_buffer.hpp
│   │   │   ├── local_graph.hpp
│   │   │   ├── local_graph_ops.hpp
│   │   │   └── vertex_set.hpp
│   │   ├── jni/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── java_any.cpp
│   │   │   ├── java_any.hpp
│   │   │   ├── org_graphlab_Aggregator.cpp
│   │   │   ├── org_graphlab_Aggregator.h
│   │   │   ├── org_graphlab_Aggregator.hpp
│   │   │   ├── org_graphlab_Context.cpp
│   │   │   ├── org_graphlab_Context.h
│   │   │   ├── org_graphlab_Context.hpp
│   │   │   ├── org_graphlab_Core.cpp
│   │   │   ├── org_graphlab_Core.h
│   │   │   ├── org_graphlab_Core.hpp
│   │   │   ├── org_graphlab_Updater.cpp
│   │   │   ├── org_graphlab_Updater.h
│   │   │   └── org_graphlab_Updater.hpp
│   │   ├── logger/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── assertions.hpp
│   │   │   ├── assertions.hpp.orig
│   │   │   ├── backtrace.cpp
│   │   │   ├── backtrace.hpp
│   │   │   ├── fail_method.hpp
│   │   │   ├── logger.cpp
│   │   │   ├── logger.hpp
│   │   │   └── logger_includes.hpp
│   │   ├── macros_def.hpp
│   │   ├── macros_undef.hpp
│   │   ├── options/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── command_line_options.cpp
│   │   │   ├── command_line_options.hpp
│   │   │   ├── engine_help.txt
│   │   │   ├── graph_help.txt
│   │   │   ├── graphlab_options.hpp
│   │   │   ├── options_includes.hpp
│   │   │   ├── options_map.cpp
│   │   │   └── options_map.hpp
│   │   ├── parallel/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── atomic.hpp
│   │   │   ├── atomic_add_vector2_empty_specialization.hpp
│   │   │   ├── atomic_ops.hpp
│   │   │   ├── cache_line_pad.hpp
│   │   │   ├── deferred_rwlock.hpp
│   │   │   ├── fiber_barrier.hpp
│   │   │   ├── fiber_conditional.hpp
│   │   │   ├── fiber_control.cpp
│   │   │   ├── fiber_control.hpp
│   │   │   ├── fiber_group.cpp
│   │   │   ├── fiber_group.hpp
│   │   │   ├── fiber_remote_request.hpp
│   │   │   ├── lockfree_push_back.hpp
│   │   │   ├── mutex.hpp
│   │   │   ├── parallel_includes.hpp
│   │   │   ├── pthread_tools.cpp
│   │   │   ├── pthread_tools.hpp
│   │   │   ├── queued_rwlock.hpp
│   │   │   ├── thread_pool.cpp
│   │   │   └── thread_pool.hpp
│   │   ├── rpc/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── async_consensus.cpp
│   │   │   ├── async_consensus.hpp
│   │   │   ├── buffered_exchange.hpp
│   │   │   ├── caching_dht.hpp
│   │   │   ├── circular_char_buffer.cpp
│   │   │   ├── circular_char_buffer.hpp
│   │   │   ├── circular_iovec_buffer.hpp
│   │   │   ├── dc.cpp
│   │   │   ├── dc.hpp
│   │   │   ├── dc_buffered_stream_send2.cpp
│   │   │   ├── dc_buffered_stream_send2.hpp
│   │   │   ├── dc_comm_base.hpp
│   │   │   ├── dc_compile_parameters.hpp
│   │   │   ├── dc_dist_object.hpp
│   │   │   ├── dc_dist_object_base.hpp
│   │   │   ├── dc_init_from_env.cpp
│   │   │   ├── dc_init_from_env.hpp
│   │   │   ├── dc_init_from_mpi.cpp
│   │   │   ├── dc_init_from_mpi.hpp
│   │   │   ├── dc_init_from_zookeeper.cpp
│   │   │   ├── dc_init_from_zookeeper.hpp
│   │   │   ├── dc_internal_types.hpp
│   │   │   ├── dc_packet_mask.hpp
│   │   │   ├── dc_receive.hpp
│   │   │   ├── dc_send.hpp
│   │   │   ├── dc_services.hpp
│   │   │   ├── dc_stream_receive.cpp
│   │   │   ├── dc_stream_receive.hpp
│   │   │   ├── dc_tcp_comm.cpp
│   │   │   ├── dc_tcp_comm.hpp
│   │   │   ├── dc_thread_get_send_buffer.hpp
│   │   │   ├── dc_types.hpp
│   │   │   ├── delta_dht.cpp
│   │   │   ├── delta_dht.hpp
│   │   │   ├── dht.hpp
│   │   │   ├── distributed_event_log.cpp
│   │   │   ├── distributed_event_log.hpp
│   │   │   ├── evwrapdef.h
│   │   │   ├── evwrapundef.h
│   │   │   ├── fiber_async_consensus.cpp
│   │   │   ├── fiber_async_consensus.hpp
│   │   │   ├── fiber_buffered_exchange.hpp
│   │   │   ├── function_arg_types_def.hpp
│   │   │   ├── function_arg_types_undef.hpp
│   │   │   ├── function_broadcast_issue.hpp
│   │   │   ├── function_call_dispatch.hpp
│   │   │   ├── function_call_issue.hpp
│   │   │   ├── function_ret_type.hpp
│   │   │   ├── get_current_process_hash.cpp
│   │   │   ├── get_current_process_hash.hpp
│   │   │   ├── is_rpc_call.hpp
│   │   │   ├── lazy_dht.hpp
│   │   │   ├── mem_function_arg_types_def.hpp
│   │   │   ├── mem_function_arg_types_undef.hpp
│   │   │   ├── object_broadcast_issue.hpp
│   │   │   ├── object_call_dispatch.hpp
│   │   │   ├── object_call_issue.hpp
│   │   │   ├── object_request_dispatch.hpp
│   │   │   ├── object_request_issue.hpp
│   │   │   ├── pod_template_structs.hpp
│   │   │   ├── request_dispatch.hpp
│   │   │   ├── request_future.hpp
│   │   │   ├── request_issue.hpp
│   │   │   ├── request_reply_handler.cpp
│   │   │   ├── request_reply_handler.hpp
│   │   │   ├── rpc.dox
│   │   │   ├── rpc_includes.hpp
│   │   │   ├── sample_sort.hpp
│   │   │   ├── thread_local_send_buffer.cpp
│   │   │   └── thread_local_send_buffer.hpp
│   │   ├── scheduler/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── fifo_scheduler.cpp
│   │   │   ├── fifo_scheduler.hpp
│   │   │   ├── get_message_priority.hpp
│   │   │   ├── ischeduler.hpp
│   │   │   ├── priority_scheduler.cpp
│   │   │   ├── priority_scheduler.hpp
│   │   │   ├── queued_fifo_scheduler.cpp
│   │   │   ├── queued_fifo_scheduler.hpp
│   │   │   ├── scheduler_factory.hpp
│   │   │   ├── scheduler_includes.hpp
│   │   │   ├── scheduler_list.cpp
│   │   │   ├── scheduler_list.hpp
│   │   │   ├── sweep_scheduler.cpp
│   │   │   └── sweep_scheduler.hpp
│   │   ├── serialization/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── basic_types.hpp
│   │   │   ├── conditional_serialize.hpp
│   │   │   ├── has_load.hpp
│   │   │   ├── has_save.hpp
│   │   │   ├── iarchive.hpp
│   │   │   ├── is_pod.hpp
│   │   │   ├── iterator.hpp
│   │   │   ├── list.hpp
│   │   │   ├── map.hpp
│   │   │   ├── oarchive.hpp
│   │   │   ├── serializable_concept.hpp
│   │   │   ├── serializable_pod.hpp
│   │   │   ├── serialization.dox
│   │   │   ├── serialization_includes.hpp
│   │   │   ├── serialize.hpp
│   │   │   ├── serialize_to_from_string.hpp
│   │   │   ├── set.hpp
│   │   │   ├── unordered_map.hpp
│   │   │   ├── unordered_set.hpp
│   │   │   ├── unsupported_serialize.hpp
│   │   │   └── vector.hpp
│   │   ├── ui/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── graphlab_visualization.js
│   │   │   ├── index.html
│   │   │   ├── intel_demo/
│   │   │   │   ├── graph_builder.json
│   │   │   │   ├── graphlab_visualization.js
│   │   │   │   ├── index.html
│   │   │   │   └── style.css
│   │   │   ├── metrics_server.cpp
│   │   │   ├── metrics_server.hpp
│   │   │   ├── mongoose/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── LICENSE
│   │   │   │   ├── mongoose.cpp
│   │   │   │   └── mongoose.h
│   │   │   └── style.css
│   │   ├── util/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── binary_parser.hpp
│   │   │   ├── blocking_queue.hpp
│   │   │   ├── bloom_filter.hpp
│   │   │   ├── branch_hints.hpp
│   │   │   ├── cache.hpp
│   │   │   ├── chandy_misra.hpp
│   │   │   ├── chandy_misra2.hpp
│   │   │   ├── chandy_misra_lockfree.hpp
│   │   │   ├── char_counting_sink.hpp
│   │   │   ├── charstream.hpp
│   │   │   ├── cuckoo_map.hpp
│   │   │   ├── cuckoo_map_pow2.hpp
│   │   │   ├── cuckoo_set_pow2.hpp
│   │   │   ├── dense_bitset.hpp
│   │   │   ├── empty.hpp
│   │   │   ├── event_log.cpp
│   │   │   ├── event_log.hpp
│   │   │   ├── fast_multinomial.hpp
│   │   │   ├── fiber_blocking_queue.hpp
│   │   │   ├── fs_util.cpp
│   │   │   ├── fs_util.hpp
│   │   │   ├── generate_pds.hpp
│   │   │   ├── generics/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── any.cpp
│   │   │   │   ├── any.hpp
│   │   │   │   ├── blob.hpp
│   │   │   │   ├── block_linked_list.hpp
│   │   │   │   ├── conditional_addition_wrapper.hpp
│   │   │   │   ├── conditional_combiner_wrapper.hpp
│   │   │   │   ├── counting_sort.hpp
│   │   │   │   ├── csr_storage.hpp
│   │   │   │   ├── dynamic_block.hpp
│   │   │   │   ├── dynamic_csr_storage.hpp
│   │   │   │   ├── float_selector.hpp
│   │   │   │   ├── integer_selector.hpp
│   │   │   │   ├── remove_member_pointer.hpp
│   │   │   │   ├── robust_cast.hpp
│   │   │   │   ├── shuffle.hpp
│   │   │   │   ├── test_function_or_functor_type.hpp
│   │   │   │   └── vector_zip.hpp
│   │   │   ├── hash_functions.hpp
│   │   │   ├── hashstream.hpp
│   │   │   ├── hdfs.cpp
│   │   │   ├── hdfs.hpp
│   │   │   ├── hopscotch_map.hpp
│   │   │   ├── hopscotch_set.hpp
│   │   │   ├── hopscotch_table.hpp
│   │   │   ├── inplace_lf_queue.cpp
│   │   │   ├── inplace_lf_queue.hpp
│   │   │   ├── inplace_lf_queue2.hpp
│   │   │   ├── integer_mix.hpp
│   │   │   ├── integer_selector.hpp
│   │   │   ├── lock_free_internal.hpp
│   │   │   ├── lock_free_pool.hpp
│   │   │   ├── memory_info.cpp
│   │   │   ├── memory_info.hpp
│   │   │   ├── mpi_tools.cpp
│   │   │   ├── mpi_tools.hpp
│   │   │   ├── mutable_queue.hpp
│   │   │   ├── net_util.cpp
│   │   │   ├── net_util.hpp
│   │   │   ├── random.cpp
│   │   │   ├── random.hpp
│   │   │   ├── resizing_array_sink.hpp
│   │   │   ├── safe_circular_char_buffer.cpp
│   │   │   ├── safe_circular_char_buffer.hpp
│   │   │   ├── small_map.hpp
│   │   │   ├── small_set.hpp
│   │   │   ├── stl_util.hpp
│   │   │   ├── synchronized_unordered_map.hpp
│   │   │   ├── synchronized_unordered_map2.hpp
│   │   │   ├── system_usage.hpp
│   │   │   ├── timer.cpp
│   │   │   ├── timer.hpp
│   │   │   ├── tracepoint.cpp
│   │   │   ├── tracepoint.hpp
│   │   │   ├── uint128.hpp
│   │   │   ├── union_find.hpp
│   │   │   ├── util_includes.hpp
│   │   │   ├── web_util.cpp
│   │   │   └── web_util.hpp
│   │   ├── version.hpp
│   │   ├── vertex_program/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── context.hpp
│   │   │   ├── icontext.hpp
│   │   │   ├── ivertex_program.hpp
│   │   │   ├── messages.hpp
│   │   │   ├── op_plus_eq_concept.hpp
│   │   │   └── vertex_program_includes.hpp
│   │   ├── warp.hpp
│   │   └── zookeeper/
│   │       ├── key_value.cpp
│   │       ├── key_value.hpp
│   │       ├── server_list.cpp
│   │       ├── server_list.hpp
│   │       ├── zookeeper_common.cpp
│   │       └── zookeeper_common.hpp
│   └── graphlab.hpp
├── tests/
│   ├── CMakeLists.txt
│   ├── arbitrary_signal_test.cpp
│   ├── async_consistent_test.cpp
│   ├── chandy_misra.cxx
│   ├── csr_storage_test.cxx
│   ├── cuckootest.cpp
│   ├── data/
│   │   ├── CMakeLists.txt
│   │   ├── test_adj/
│   │   │   └── test.adj
│   │   ├── test_snap/
│   │   │   └── test.snap
│   │   └── test_tsv/
│   │       └── test.tsv
│   ├── dc_consensus_test.cpp
│   ├── dc_fiber_consensus_test.cpp
│   ├── dc_test_sequentialization.cpp
│   ├── dcsc_test.cpp
│   ├── dense_bitset_test.cxx
│   ├── dht_performance_test.cpp
│   ├── distributed_chandy_misra_test.cpp
│   ├── distributed_graph_test.cpp
│   ├── distributed_ingress_test.cpp
│   ├── empty_test.cxx
│   ├── engine_terminator_bench.cxx
│   ├── fiber_test.cpp
│   ├── fibo_fiber_test.cpp
│   ├── hdfs_test.cpp
│   ├── hopscotch_test.cpp
│   ├── local_graph_test.cxx
│   ├── lock_free_pushback.cxx
│   ├── mini_web_server.cpp
│   ├── random_test.cxx
│   ├── runtests.sh
│   ├── scheduler_test.cxx
│   ├── serializetests.cxx
│   ├── sfinae_function_test.cpp
│   ├── small_map_test.cxx
│   ├── small_set_test.cxx
│   ├── sort_test.cpp
│   ├── synchronous_engine_test.cpp
│   ├── test_lock_free_pool.cxx
│   ├── test_parsers.cpp
│   ├── test_vertex_set.cpp
│   ├── thread_tools.cxx
│   └── union_find_test.cxx
└── toolkits/
    ├── CMakeLists.txt
    ├── README.md
    ├── clustering/
    │   ├── CMakeLists.txt
    │   ├── clustering.dox
    │   ├── generate_synthetic.cpp
    │   ├── graph_laplacian_for_sc.cpp
    │   ├── kmeans.cpp
    │   └── spectral_clustering.cpp
    ├── collaborative_filtering/
    │   ├── CMakeLists.txt
    │   ├── adpredictor.cpp
    │   ├── als.cpp
    │   ├── biassgd.cpp
    │   ├── cdf.hpp
    │   ├── collaborative_filtering.dox
    │   ├── cosamp.hpp
    │   ├── doc.cpp
    │   ├── eigen_serialization.hpp
    │   ├── eigen_wrapper.hpp
    │   ├── implicit.hpp
    │   ├── make_synthetic_als_data.cpp
    │   ├── math.hpp
    │   ├── nmf.cpp
    │   ├── printouts.hpp
    │   ├── sgd.cpp
    │   ├── sparse_als.cpp
    │   ├── stats.hpp
    │   ├── svd.cpp
    │   ├── svdpp.cpp
    │   ├── types.hpp
    │   ├── wals.cpp
    │   ├── warp_als_coord.cpp
    │   └── warp_nmf.cpp
    ├── computer_vision/
    │   ├── CMakeLists.txt
    │   ├── computer_vision.dox
    │   ├── eigen_serialization.cpp
    │   ├── eigen_serialization.hpp
    │   ├── gcgraph.hpp
    │   ├── grabcut.cpp
    │   ├── opencv_serialization.cpp
    │   ├── opencv_serialization.hpp
    │   ├── precomp.hpp
    │   ├── seam_finders_gr.hpp
    │   ├── stitch_full_main.cpp
    │   ├── stitch_grlab.hpp
    │   ├── stitch_main.cpp
    │   ├── stitch_main.hpp
    │   ├── stitch_opts.hpp
    │   ├── stitching.cpp
    │   ├── stitching_detailed.cpp
    │   └── utils.hpp
    ├── extensions/
    │   ├── CMakeLists.txt
    │   ├── MurmurHash3.cpp
    │   ├── MurmurHash3.h
    │   ├── example.txt
    │   ├── extension.cpp
    │   ├── extension_data.hpp
    │   ├── extension_gas.hpp
    │   ├── extension_gas_base_types.hpp
    │   ├── extension_gas_lambda_wrapper.hpp
    │   ├── extension_graph.cpp
    │   ├── extension_graph.hpp
    │   ├── extension_main.cpp
    │   ├── extension_main.hpp
    │   ├── extension_pagerank.cpp
    │   ├── extensions.hpp
    │   └── pagerank_extension_driver.cpp
    ├── graph_algorithms/
    │   ├── CMakeLists.txt
    │   ├── betweeness.cpp
    │   ├── closeness.cpp
    │   ├── djikstra.cpp
    │   ├── graph_analytics.dox
    │   └── prestige.cpp
    ├── graph_analytics/
    │   ├── CMakeLists.txt
    │   ├── TSC.cpp
    │   ├── approximate_diameter.cpp
    │   ├── connected_component.cpp
    │   ├── connected_component_stats.cpp
    │   ├── degree_ordered_coloring.cpp
    │   ├── directed_triangle_count.cpp
    │   ├── eigen_vector_normalization.cpp
    │   ├── format_convert.cpp
    │   ├── graph_analytics.dox
    │   ├── graph_laplacian.cpp
    │   ├── http/
    │   │   ├── index.html
    │   │   ├── make_jsons.m
    │   │   ├── style.css
    │   │   ├── top_users.json
    │   │   └── twitter_triangles.js
    │   ├── kcore.cpp
    │   ├── pagerank.cpp
    │   ├── partitioning.cpp
    │   ├── saturation_ordered_coloring.cpp
    │   ├── simple_coloring.cpp
    │   ├── simple_undirected_triangle_count.cpp
    │   ├── sssp.cpp
    │   ├── undirected_triangle_count.cpp
    │   ├── warp_bond_percolation.cpp
    │   ├── warp_coloring.cpp
    │   ├── warp_pagerank.cpp
    │   └── warp_pagerank2.cpp
    ├── graphical_models/
    │   ├── CMakeLists.txt
    │   ├── ad3_qp.hpp
    │   ├── dd_grlab.hpp
    │   ├── dd_main.cpp
    │   ├── dd_main.hpp
    │   ├── dd_opts.hpp
    │   ├── deprecated/
    │   │   ├── factors/
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── binary_factor.cpp
    │   │   │   ├── binary_factor.hpp
    │   │   │   ├── discrete_variable.cpp
    │   │   │   ├── factor_includes.hpp
    │   │   │   ├── factor_test.cxx
    │   │   │   ├── unary_factor.cpp
    │   │   │   └── unary_factor.hpp
    │   │   ├── gibbs_sampling/
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── README
    │   │   │   ├── chromatic_sampler.cpp
    │   │   │   ├── chromatic_sampler.hpp
    │   │   │   ├── factorized_model.cpp
    │   │   │   ├── factorized_model.hpp
    │   │   │   ├── global_variables.cpp
    │   │   │   ├── global_variables.hpp
    │   │   │   ├── image.cpp
    │   │   │   ├── image.hpp
    │   │   │   ├── jt_splash_sampler.cpp
    │   │   │   ├── jt_splash_sampler.hpp
    │   │   │   ├── junction_tree.cpp
    │   │   │   ├── junction_tree.hpp
    │   │   │   ├── make_denoise_alchemy.cpp
    │   │   │   ├── matlab/
    │   │   │   │   ├── Makefile
    │   │   │   │   ├── asg2ind.m
    │   │   │   │   ├── compile_gibbs_sampler.m
    │   │   │   │   ├── gibbs_sampler.m
    │   │   │   │   ├── gibbs_sampler_impl.cpp
    │   │   │   │   ├── ind2asg.m
    │   │   │   │   ├── matwrap.hpp
    │   │   │   │   ├── table_factor.m
    │   │   │   │   └── tests/
    │   │   │   │       ├── denoise_test.m
    │   │   │   │       ├── make_grid_model.m
    │   │   │   │       ├── small_test.m
    │   │   │   │       └── small_test2.m
    │   │   │   ├── mrf.cpp
    │   │   │   ├── mrf.hpp
    │   │   │   ├── pgibbs_tls.cpp
    │   │   │   ├── pgibbs_tls.hpp
    │   │   │   ├── run_statistics.hpp
    │   │   │   ├── sampler.cpp
    │   │   │   ├── util.cpp
    │   │   │   └── util.hpp
    │   │   ├── kernelbp/
    │   │   │   ├── CMakeLists.txt
    │   │   │   └── old/
    │   │   │       ├── BallTreeDensity.cpp
    │   │   │       ├── CMakeLists.txt
    │   │   │       ├── cpp/
    │   │   │       │   ├── BallTree.h
    │   │   │       │   ├── BallTreeClass.cc
    │   │   │       │   ├── BallTreeDensity.h
    │   │   │       │   ├── BallTreeDensityClass.cc
    │   │   │       │   ├── NOTICE
    │   │   │       │   └── kernels.h
    │   │   │       ├── denoise.cpp
    │   │   │       ├── fakemex.h
    │   │   │       ├── image.hpp
    │   │   │       ├── kde.h
    │   │   │       ├── prob.hpp
    │   │   │       └── prodSampleEpsilon.hpp
    │   │   └── loopybp_denoise.cpp
    │   ├── eigen_serialization.cpp
    │   ├── eigen_serialization.hpp
    │   ├── factors/
    │   │   ├── CMakeLists.txt
    │   │   ├── bp_graph_data.h
    │   │   ├── bp_vertex_program.hpp
    │   │   ├── dense_table.hpp
    │   │   ├── discrete_assignment.hpp
    │   │   ├── discrete_bounds.hpp
    │   │   ├── discrete_domain.hpp
    │   │   ├── discrete_variable.hpp
    │   │   ├── factor_graph.hpp
    │   │   ├── factor_graphs.dox
    │   │   ├── fast_discrete_assignment.hpp
    │   │   ├── sparse_index.hpp
    │   │   ├── sparse_table.hpp
    │   │   ├── table_base.hpp
    │   │   ├── table_factor.hpp
    │   │   └── tests/
    │   │       ├── CMakeLists.txt
    │   │       ├── denoise/
    │   │       │   ├── CMakeLists.txt
    │   │       │   └── denoise.cpp
    │   │       ├── test_MAD_relation/
    │   │       │   ├── CMakeLists.txt
    │   │       │   └── test_MAD_relation.cpp
    │   │       ├── test_bool_var/
    │   │       │   ├── CMakeLists.txt
    │   │       │   ├── test_bool_var.cpp
    │   │       │   └── test_cat_bool_joint.cpp
    │   │       ├── test_dense_table/
    │   │       │   ├── CMakeLists.txt
    │   │       │   └── test_dense_table.cpp
    │   │       └── test_sparse_table/
    │   │           ├── CMakeLists.txt
    │   │           ├── test_neg_relation.cpp
    │   │           └── test_sparse_table.cpp
    │   ├── graphical_models.dox
    │   ├── lbp_structured_prediction.cpp
    │   ├── mplp_denoise.cpp
    │   ├── mplp_structured_prediction.cpp
    │   ├── profile_lbp_synthetic.cpp
    │   ├── profile_lbp_synthetic2.cpp
    │   ├── synthetic_image_data.cpp
    │   └── utils.hpp
    ├── linear_solvers/
    │   ├── CMakeLists.txt
    │   ├── jacobi.cpp
    │   └── linear_solvers.dox
    ├── toolkits.dox
    └── topic_modeling/
        ├── CMakeLists.txt
        ├── cgs_lda.cpp
        ├── cgs_lda_mimno_experimental.cpp
        ├── deprecated/
        │   ├── cgs_lda.cpp
        │   ├── cvb0_lda_common.cpp
        │   ├── cvb0_lda_common.hpp
        │   └── fast_cvb0_lda.cpp
        ├── http/
        │   ├── index.html
        │   ├── lda_visualizer.js
        │   ├── style.css
        │   └── wordclouds
        ├── lda_sequential_cgs.cpp
        └── topic_modeling.dox

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# use glob syntax
syntax: glob

*~
debug/*
release/*
profile/*
dist/graphlabapi*
doc/doxygen/*
doc/doxygen_internal/*
deps/*
configure.deps
config.log
new_news.txt
cxxtest/cxxtest/*.pyc

extapis/java_jni/dist
extapis/java_jni/bin

demoapps/coloring/matlab_tools/*.tsv
demoapps/shortest_path/matlab_tools/*.tsv
demoapps/pagerank/matlab_tools/*.tsv

tags
*swp
\.mchg\.mchg
.ycm_extra_conf*


================================================
FILE: BINARY_README
================================================

                         Graphlab Binary Release
                         -----------------------

=======
License
=======

GraphLab is free software licensed under the Apache 2.0 License. See
license/LICENSE.txt for details.

============
Introduction
============

GraphLab PowerGraph is a graph-based, high performance, distributed computation framework
written in C++.  

GraphLab Features:

Unified multicore/
distributed API:       write once run anywhere 

Tuned for performance: optimized C++ execution engine leverages extensive 
                       multi-threading and asynchronous IO 

Scalable:              Run on large cluster deployments by
                       intelligently placing data and computation 

HDFS Integration:      Access your data directly from HDFS 

Powerful Machine 
Learning Toolkits:     Tackle challenging machine 
                       learning problems with ease


========
Mac OS X
========
The Mac OS X build is restricted and is compiled without HDFS support, nor
distributed capabilities. All toolkits therefore run entirely on one machine
in shared memory.

These binaries have been tested to run on OS X 10.6 or greater. Though they
may work on 10.5 as well.

=====
Linux
=====
The Linux builds are more complex due to varying glibc versions as well as the
need to support both OpenMPI and MPICH2. FOUR binary releases are provided
Please obtain the package closest matching your needs. In particular, it is
important to match the MPI library you have on your cluster: 
they are not compatible.

The binary packages are:
 - graphlab_openmpi : Supports OpenMPI 1.3 and 1.4. Supports HDFS and thus
                      requires a working JVM (whether or not you use 
                      HDFS).
 - graphlab_mpich2 : Supports MPICH2. Supports HDFS and thus
                     requires a working JVM (whether or not you use 
                     HDFS).
 - graphlab_openmpi_no_jvm : Supports OpenMPI 1.3 and 1.4. Does not support
                             HDFS. Does not require a working JVM. Loading
                             files distributed will therefore require either
                             a distributed file system or a common NFS
                             share.
 - graphlab_mpich2_no_jvm : Supports MPICH2. Does not support HDFS. Does not 
                            require a working JVM. Loading files distributed 
                            will therefore require either a distributed file 
                            system or a common NFS share. If you want to run
                            to run only in shared memory, this is also the
                            package to use.

Each binary is paired with a matching shell script which sets up the
execution environment (by forcing the load of provided versions of system
binaries). Several environment variables are used to control the process. 

  JAVA_HOME
     Either JAVA_HOME or JVM_SO_PATH must be set. 
     This must point to the Java home directory.
     For instance: /usr/lib/jvm/java-6-openjdk
     This was tested with Oracle's implementation of Java (sun-jdk or open-jdk).

  JVM_SO_PATH
     Either JAVA_HOME or JVM_SO_PATH must be set. 
     Optional. The script will expect to find libjvm.so in 
     $JAVA_HOME/jre/lib/amd64/client/libjvm.so or
     $JAVA_HOME/jre/lib/amd64/server/libjvm.so
     If libjvm.so is not in either locations, the script will fail. In which 
     case, you should set this variable to the directory containing libjvm.so.

  USE_SYSTEM_LIBS
     Optional. If set, the system's glibc (and other system dependencies) 
     will be used instead of the provided versions. 


HDFS Capability is compiled in. If HDFS is to be used, the 
"hadoop" command must be available.


Additional Notes for OpenMPI
----------------------------
Unlike the MPICH2 build, the OpenMPI build is unable to static link
everything.  For now, we provide only binary releases built against 
OpenMPI 1.3.2 which should be binary compatible with 1.4. 

=============
Build Details
=============

Mac Build:
Compiled on OS X 10.8
Apple clang version 4.0 (tags/Apple/clang-421.0.57) (based on LLVM 3.1svn)
-O3 -march=x86-64 -mtune=generic  -mmacosx-version-min=10.5 

Linux Build:
To support maximal compatibility, this was compiled on an 
old Linux distribution.

Compiled on Ubuntu 9.04, GCC 4.3.3
-O3 -march=x86-64 -mtune=generic 

OpenMPI: 1.3.2 (ABI Compatible with 1.4)
MPICH2: 1.5


================================================
FILE: CMakeLists.txt
================================================
project(GraphLab)

# We require the most recent version of cmake and automatically
# install the correct version when using the cmake lists
cmake_minimum_required(VERSION 2.8)

# Libraries linked via full path no longer produce linker search paths.
cmake_policy(SET CMP0003 NEW)
# Preprocessor definition values are now escaped automatically.
cmake_policy(SET CMP0005 NEW)

# Determine where additional GraphLab specific cmake modules are
# defined
set(CMAKE_MODULE_PATH ${GraphLab_SOURCE_DIR}/cmake)

# TODO: what is this used by
set(BUILD_SHARED_LIBS ON)

include(CheckLibraryExists)
include(CheckFunctionExists)
include(ExternalProject)
include(CheckCXXSourceCompiles)

# check we are on a 64 bit machine (else fail)
if( CMAKE_SIZEOF_VOID_P EQUAL 4 )
   message( SEND_ERROR "GraphLab does not support 32 bit systems. Please switch to 64 bit system and try again" )
endif ( CMAKE_SIZEOF_VOID_P EQUAL 4 )

# # Display information about cmake variables
# include(CMakePrintSystemInformation)

# set include path
include_directories(
  ${GraphLab_SOURCE_DIR}/src
  ${GraphLab_SOURCE_DIR}/cxxtest
  ${GraphLab_SOURCE_DIR}/deps/local/include)

# set link path
link_directories(${GraphLab_SOURCE_DIR}/deps/local/lib)

add_definitions(-DUSE_DYNAMIC_LOCAL_GRAPH)

if(NO_OPENMP)
  set(OPENMP_C_FLAGS "")
  set(OPENMP_LIBRARIES "")
  add_definitions(-D__NO_OPENMP__)
else()
  set(OPENMP_C_FLAGS "-fopenmp")
  set(OPENMP_LIBRARIES "gomp")
endif()

if (APPLE)
link_libraries(pthread ${OPENMP_LIBRARIES})
else()
link_libraries(pthread rt ${OPENMP_LIBRARIES})
endif()

set(CPP11_FLAGS "")
if(CPP11)
  message(STATUS "C++11 Enabled")
  set(CPP11_FLAGS "-std=c++11 -Wno-enum-compare -Wno-conversion-null")
endif()

if(VID32)
  message(STATUS "Using 32bit vertex id types")
  add_definitions(-DUSE_VID32)
endif()


# Shared compiler flags used by all builds (debug, profile, release)
set(COMPILER_FLAGS "-Wall -g ${CPP11_FLAGS} ${OPENMP_C_FLAGS}" CACHE STRING "common compiler options")

# Disable address space randomization for OSX lion and above
if (APPLE)
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Xlinker -no_pie")
endif()

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath,${GraphLab_SOURCE_DIR}/deps/local/lib")

# Set subdirectories
subdirs(src tests demoapps toolkits)
if(EXPERIMENTAL)
  if (IS_DIRECTORY ${GraphLab_SOURCE_DIR}/experimental)
    subdirs(experimental)
  endif()
endif()

if (IS_DIRECTORY ${GraphLab_SOURCE_DIR}/apps)
  subdirs(apps)
endif()

# Set installation rules
install(DIRECTORY src/
  DESTINATION include
  FILES_MATCHING PATTERN "*.hpp"  PATTERN "*.h"
  PATTERN ".svn" EXCLUDE
  )
install(CODE "execute_process(COMMAND ./scripts/install_deps.sh
  ${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY
  ${CMAKE_CURRENT_SOURCE_DIR})")

# set(EXTERNAL_PROJECT_FLAGS
#   "-I${GraphLab_SOURCE_DIR}/deps/local/include -L${GraphLab_SOURCE_DIR}/deps/local/lib")


### ===================================================================
# ## Download News update
# if(NOT DISABLE_NEWS)
#   message(STATUS
#     "\n"
#     "===============================================================\n"
#     "Downloading the news from graphlab.org: ")
#   file(DOWNLOAD "http://graphlab.org/news.txt"
#     ${GraphLab_SOURCE_DIR}/new_news.txt
#     TIMEOUT 3
#     STATUS download_status)
#   # Track usage v2 news url
#   file(DOWNLOAD "http://c.statcounter.com/7396904/0/a1b29b21/1/"
#     ${GraphLab_SOURCE_DIR}/temporary_download_counter.tmp
#     TIMEOUT 3
#     STATUS dlstatus)
#   file(REMOVE ${GraphLab_SOURCE_DIR}/temporary_download_counter.tmp)
#   file(READ ${GraphLab_SOURCE_DIR}/new_news.txt news_update)
#   message(STATUS
#     "Finished.\n"
#     ${news_update}
#     "\n"
#     "===============================================================")
# endif()


## ============================================================================
## ============================================================================
## ============================================================================
# Build external dependencies

# Build eigen =================================================================
# TODO: Move to toolkits.
# While eigen is not used in the core GraphLab library we found it
# useful to have a matrix library. For now we download eigen from
# our servers since the eigen servers use SSL which is not supported
# by cmake DOWNLOAD_COMMAND hg clone
# https://bitbucket.org/eigen/eigen/
ExternalProject_Add(eigen
  PREFIX ${GraphLab_SOURCE_DIR}/deps/eigen
  URL http://bitbucket.org/eigen/eigen/get/3.1.2.tar.bz2
  URL_MD5 e9c081360dde5e7dcb8eba3c8430fde2
  CONFIGURE_COMMAND ""
  BUILD_COMMAND ""
  BUILD_IN_SOURCE 1
  INSTALL_COMMAND cp -r Eigen unsupported <INSTALL_DIR>/
  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local/include)
add_definitions(-DHAS_EIGEN)

macro(requires_eigen NAME)
  add_dependencies(${NAME} eigen)
endmacro(requires_eigen)


# libbz  =================================================================
ExternalProject_Add(libbz2
  PREFIX ${GraphLab_SOURCE_DIR}/deps/libbz2
  URL http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz
  URL_MD5 00b516f4704d4a7cb50a1d97e6e8e15b
  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
  CONFIGURE_COMMAND ""
  PATCH_COMMAND patch -N -p0 -i ${GraphLab_SOURCE_DIR}/patches/libbz2_fpic.patch || true
  BUILD_IN_SOURCE 1
  BUILD_COMMAND make install PREFIX=<INSTALL_DIR>
  INSTALL_COMMAND "" )


# Build Boost =================================================================
# GraphLab relies heavily on the boost library for parsing program
# options, filesystem support, random number generation, and
# unordered containers.
ExternalProject_Add(boost
  PREFIX ${GraphLab_SOURCE_DIR}/deps/boost
  URL "http://tcpdiag.dl.sourceforge.net/project/boost/boost/1.53.0/boost_1_53_0.tar.gz" 
  URL_MD5 57a9e2047c0f511c4dfcf00eb5eb2fbb
  BUILD_IN_SOURCE 1
  CONFIGURE_COMMAND
  ./bootstrap.sh
  --with-libraries=filesystem
  --with-libraries=program_options
  --with-libraries=system
  --with-libraries=iostreams
  --with-libraries=date_time
  --with-libraries=random
  --with-libraries=context
  --prefix=<INSTALL_DIR>
  BUILD_COMMAND
  C_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
  CPLUS_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
  LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib
  ./b2 install link=static variant=release threading=multi runtime-link=static
  INSTALL_COMMAND ""
  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local )
set(BOOST_ROOT ${GraphLab_SOURCE_DIR}/deps/local )
set(BOOST_LIBS_DIR ${GraphLab_SOURCE_DIR}/deps/local/lib)
set(Boost_LIBRARIES
  ${BOOST_LIBS_DIR}/libboost_filesystem.a
  ${BOOST_LIBS_DIR}/libboost_program_options.a
  ${BOOST_LIBS_DIR}/libboost_system.a
  ${BOOST_LIBS_DIR}/libboost_iostreams.a
  ${BOOST_LIBS_DIR}/libboost_context.a
  ${BOOST_LIBS_DIR}/libboost_date_time.a)
add_dependencies(boost libbz2)
message(STATUS "Boost libs: " ${Boost_LIBRARIES})
# add_definitions(-DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG)
# add_definitions(-DBOOST_ALL_DYN_LINK)
# set(Boost_SHARED_LIBRARIES "")
foreach(blib ${Boost_LIBRARIES})
  message(STATUS "Boost libs: " ${blib})
  string(REGEX REPLACE "\\.a$" ${CMAKE_SHARED_LIBRARY_SUFFIX} bout ${blib})
  message(STATUS "Boost dyn libs: " ${bout})
  set(Boost_SHARED_LIBRARIES ${Boost_SHARED_LIBRARIES} ${bout})
endforeach()
message(STATUS "Boost Shared libs: " ${Boost_SHARED_LIBRARIES})

if(NOT NO_TCMALLOC)
    if(APPLE)
    set (tcmalloc_shared "--enable-shared=yes")
    else()
    set (tcmalloc_shared "--enable-shared=no")
    endif()

    # TCMalloc  ===================================================================
    # We use tcmalloc for improved memory allocation performance
    ExternalProject_Add(libtcmalloc
    PREFIX ${GraphLab_SOURCE_DIR}/deps/tcmalloc
    # Some users can't access domain googlecode.com ,This is a spare URL
    # URL http://sourceforge.jp/projects/sfnet_gperftools.mirror/downloads/gperftools-2.0.tar.gz
    URL http://gperftools.googlecode.com/files/gperftools-2.0.tar.gz
    URL_MD5 13f6e8961bc6a26749783137995786b6
    PATCH_COMMAND patch -N -p0 -i ${GraphLab_SOURCE_DIR}/patches/tcmalloc.patch || true
    CONFIGURE_COMMAND <SOURCE_DIR>/configure --enable-frame-pointers --prefix=<INSTALL_DIR> ${tcmalloc_shared}
    INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)
    #link_libraries(tcmalloc)
    set(TCMALLOC-FOUND 1)
    add_definitions(-DHAS_TCMALLOC)
endif()


# Lib Jpeg =================================================================
#ExternalProject_Add(libjpeg
#  PREFIX ${GraphLab_SOURCE_DIR}/deps/libjpeg
#  URL http://www.ijg.org/files/jpegsrc.v8d.tar.gz
#  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
#  CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
#  BUILD_IN_SOURCE 1)
#


# v8 ======================================================================

# ImageMagick =================================================================
# # set(imagemagick_configure_cmd
# #   bash -c "C_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include CPLUS_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib LDFLAGS='-L${GraphLab_SOURCE_DIR}/deps/local/lib' ./configure  --prefix=${GraphLab_SOURCE_DIR}/deps/local --without-threads --disable-openmp --disable-opencl")
# # ExternalProject_Add(imagemagick
# #   PREFIX ${GraphLab_SOURCE_DIR}/deps/imagemagick
# #   URL http://www.imagemagick.org/download/ImageMagick.tar.gz
# #   # URL_MD5 010b63a2542c4ec4918c8cb431c00356
# #   BUILD_IN_SOURCE 1
# #   CONFIGURE_COMMAND ${imagemagick_configure_cmd}
# #   BUILD_COMMAND
# #   C_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
# #   CPLUS_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
# #   LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib
# # #  LDFLAGS=-L${GraphLab_SOURCE_DIR}/deps/local/lib
# #   make
# #   INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)
#ExternalProject_Add(imagemagick
#  PREFIX ${GraphLab_SOURCE_DIR}/deps/imagemagick
#  URL http://www.imagemagick.org/download/ImageMagick.tar.gz
#  # URL_MD5 010b63a2542c4ec4918c8cb431c00356
#  CONFIGURE_COMMAND
#  C_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
#  CPLUS_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
#  LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib
#  LDFLAGS=-L${GraphLab_SOURCE_DIR}/deps/local/lib
#  <SOURCE_DIR>/configure
#  --prefix=<INSTALL_DIR>
#  --without-threads
#  --disable-openmp
#  --disable-opencl
#  BUILD_COMMAND
#  C_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
#  CPLUS_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include
#  LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib
#  LDFLAGS=-L${GraphLab_SOURCE_DIR}/deps/local/lib
#  make
#  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)
#include_directories(${GraphLab_SOURCE_DIR}/deps/local/include/ImageMagick)
#add_dependencies(imagemagick libjpeg)

# macro(requires_imagemagick NAME)
#  target_link_libraries(${NAME} Magick++ jpeg)
#  add_dependencies(${NAME} imagemagick libjpeg)
# endmacro(requires_imagemagick)

# OpenCV =================================================================
ExternalProject_Add(opencv
  PREFIX ${GraphLab_SOURCE_DIR}/deps/opencv
  URL http://sourceforge.net/projects/opencvlibrary/files/opencv-unix/2.4.9/opencv-2.4.9.zip/download
  # URL_MD5 010b63a2542c4ec4918c8cb431c00356
  PATCH_COMMAND patch -N -p0 cmake/OpenCVModule.cmake -i ${GraphLab_SOURCE_DIR}/patches/opencv_apple_rpath.patch || true
  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
  -DBUILD_TESTS=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_PACKAGE=OFF -DBUILD_EXAMPLES=OFF -DOPENCV_BUILD_3RDPARTY_LIBS=ON -DBUILD_SHARED_LIBS=ON -DBUILD_DOCS=OFF -DBUILD_JPEG=ON -DCMAKE_INCLUDE_PATH=${GraphLab_SOURCE_DIR}/deps/local/include -DWITH_CUBLAS=OFF -DWITH_1394=OFF -DWITH_AVFOUNDATION=OFF -DWITH_CUDA=OFF -DWITH_CUFFT=OFF -DWITH_FFMPEG=OFF -DWITH_GSTREAMER=OFF -DWITH_GTK=OFF -DWITH_QUICKTIME=OFF -DWITH_VIDEOINPUT=OFF -DWITH_XIMEA=OFF -DWITH_XINE=OFF -DWITH_V4L=OFF -DWITH_UNICAP=OFF -DWITH_QT=OFF -DWITH_JASPER=NO -DWITH_TIFF=NO -DWITH_OPENCL=OFF -DCMAKE_LIBRARY_PATH=${GraphLab_SOURCE_DIR}/deps/local/lib
  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)

# add_dependencies(opencv eigen)

macro(requires_opencv NAME)
  get_target_property(tmp ${NAME} COMPILE_FLAGS)
  if (NOT tmp)
    set(tmp "-I${CMAKE_SOURCE_DIR}/deps/local/include/opencv")
  else()
    set(tmp "${tmp} -I${CMAKE_SOURCE_DIR}/deps/local/include/opencv")
  endif()
  set_target_properties(${NAME} PROPERTIES COMPILE_FLAGS "${tmp}")
  target_link_libraries(${NAME} opencv_core opencv_imgproc opencv_calib3d opencv_video opencv_features2d opencv_ml opencv_highgui opencv_objdetect opencv_contrib opencv_legacy opencv_contrib opencv_stitching)
  add_dependencies(${NAME} opencv)
endmacro(requires_opencv)

# zlib  =======================================================================
# We rely on the zlib library to read gzip compressed files
# (using boost iostreams).
# check_library_exists(z zlibVersion "" ZLIB_FOUND)
# if (ZLIB_FOUND)
#   add_definitions(-DHAS_ZLIB)
#   link_libraries(z)
# endif()

# libevent ====================================================================
# LibEvent is used in the RPC layer to manage the interaction between the
# TCP stack and the event handler threads

if (APPLE)
  ExternalProject_Add(libevent
    PREFIX ${GraphLab_SOURCE_DIR}/deps/event
    URL http://iweb.dl.sourceforge.net/project/levent/libevent/libevent-2.0/libevent-2.0.18-stable.tar.gz
    URL_MD5 aa1ce9bc0dee7b8084f6855765f2c86a
    CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR> --disable-openssl --enable-shared=no
    INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
  )
else()
  ExternalProject_Add(libevent
    PREFIX ${GraphLab_SOURCE_DIR}/deps/event
    URL http://iweb.dl.sourceforge.net/project/levent/libevent/libevent-2.0/libevent-2.0.18-stable.tar.gz
    URL_MD5 aa1ce9bc0dee7b8084f6855765f2c86a
    CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR> --disable-openssl --enable-shared=no
    INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
    INSTALL_COMMAND prefix=<INSTALL_DIR>/ make install && ${GraphLab_SOURCE_DIR}/patches/libevent_clean_and_remap.sh <INSTALL_DIR>/lib
  )
endif()
#link_libraries(event)
#link_libraries(event_pthreads)


# libjson ====================================================================
# Lib Json is used to support json serialization for long term storage of
# graph data.
#ExternalProject_Add(libjson
#  PREFIX ${GraphLab_SOURCE_DIR}/deps/json
#  URL http://graphlab.org/deps/libjson_7.6.0.zip
#  URL_MD5 dcb326038bd9b710b8f717580c647833
#  BUILD_IN_SOURCE 1
#  CONFIGURE_COMMAND ""
#  PATCH_COMMAND patch -N -p1 -i ${GraphLab_SOURCE_DIR}/patches/libjson.patch || true
#  BUILD_COMMAND make
#  INSTALL_COMMAND prefix=<INSTALL_DIR>/ make install
#  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
#  )


# zookeeper ================================================================

ExternalProject_Add(zookeeper
  PREFIX ${GraphLab_SOURCE_DIR}/deps/zookeeper
  URL http://mirror.metrocast.net/apache/zookeeper/zookeeper-3.5.1-alpha/zookeeper-3.5.1-alpha.tar.gz
  URL_MD5 d85f9751724d3f20f792803b61c4db24
  PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory ${GraphLab_SOURCE_DIR}/patches/zookeeper/ <SOURCE_DIR>
  BUILD_IN_SOURCE 1
  CONFIGURE_COMMAND ./configure --prefix=<INSTALL_DIR> --disable-shared
  INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)

## =============================================================
# JNI =========================================================================
# We use JNI for jvm interfacing and for hdfs support.  We currently only
# detect the presence of java and do not install our own version.

if(NOT NO_JAVAC)
  find_package(Java)
  find_package(JNI)


  if((EXISTS ${JAVA_INCLUDE_PATH}/jni.h))
    set(JNI_REALLY_FOUND 1 CACHE BOOL "Tests whether the header actually exists")
    message(STATUS "jni.h was found at " ${JAVA_INCLUDE_PATH}/jni.h)
    include_directories(${JNI_INCLUDE_DIRS})
    ## Guess java home location
    if(EXISTS $ENV{JAVA_HOME} )
      set(JAVA_HOME $ENV{JAVA_HOME} CACHE STRING "Location of Java Home")
      message(STATUS "Java home set by user: " ${JAVA_HOME})
    else()
      string(REGEX REPLACE "\\/include" "" JAVA_HOME_GUESS  ${JAVA_AWT_INCLUDE_PATH})
      set(JAVA_HOME ${JAVA_HOME_GUESS} CACHE STRING "Location of Java Home")
      message(STATUS "Java Home guessed: " ${JAVA_HOME})
    endif()
  else( )
    set(JNI_REALLY_FOUND 0 CACHE BOOL "Tests whether the header actually exists")
    message(STATUS "jni.h was not found at " ${JAVA_INCLUDE_PATH}/jni.h)
  endif( )

  # Ant =========================================================================
  # Ant is used to build the graphlab JVM interface
  find_package(Ant)

  # LibHDFS =====================================================================
  # If JNI is found we install libhdfs which allows programs to read and write
  # to hdfs filesystems
  if( JNI_REALLY_FOUND )
    message(STATUS "Building libhdfs")
    ExternalProject_Add(hadoop
      PREFIX ${GraphLab_SOURCE_DIR}/deps/hadoop
      URL https://archive.apache.org/dist/hadoop/core/hadoop-1.0.1/hadoop-1.0.1.tar.gz
#      URL http://www.gtlib.gatech.edu/pub/apache/hadoop/common/hadoop-1.0.1/hadoop-1.0.1.tar.gz
      URL_MD5 e627d9b688c4de03cba8313bd0bba148
      UPDATE_COMMAND chmod +x <SOURCE_DIR>/src/c++/libhdfs/install-sh <SOURCE_DIR>/src/c++/libhdfs/configure
      PATCH_COMMAND patch -N -p1 -i ${GraphLab_SOURCE_DIR}/patches/libhdfs.patch || true
      BUILD_IN_SOURCE 1
      CONFIGURE_COMMAND <SOURCE_DIR>/src/c++/libhdfs/configure JVM_ARCH=tune=generic --prefix=<INSTALL_DIR> --with-java=${JAVA_HOME} --enable-shared=no --enable-static=yes
      INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
      TEST_COMMAND bash -c "mkdir -p <INSTALL_DIR>/include && cp <SOURCE_DIR>/src/c++/libhdfs/hdfs.h <INSTALL_DIR>/include")
    set(HADOOP_FOUND 1 CACHE BOOL "Hadoop was found")
    add_definitions(-DHAS_HADOOP)
    # link_libraries(hdfs ${JAVA_JVM_LIBRARY})
  endif( JNI_REALLY_FOUND )


endif()


# MPI =========================================================================
# MPI is currently used to establish communication between rpc nodes
#sgr this name is confusing. i like WITH_MPI better. likewise for NO_OPENMP
if(NO_MPI)
  message(STATUS "MPI disabled as a configure option.")
else()
  #find_package(MPICH2)
  find_package(MPI) 
  if(MPI_FOUND)
    message(STATUS "MPI Found: " ${MPI_LIBRARY} ${MPI_EXTRA_LIBRARY})
    add_definitions(-DHAS_MPI)
    include_directories(${MPI_INCLUDE_PATH})
  else(MPI_FOUND)
    message(STATUS "MPI Not Found! Distributed Executables will not be compiled")
    set(MPI_LIBRARY "")
    set(MPI_EXTRA_LIBRARY "")
    # ExternalProject_Add(openmpi
    #   PREFIX ${GraphLab_SOURCE_DIR}/deps/openmpi
    #   URL http://www.open-mpi.org/software/ompi/v1.4/downloads/openmpi-1.4.5.tar.gz
    #   CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
    #   INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local)
  endif(MPI_FOUND)
endif()


#ExternalProject_Add(qthreads
#                    PREFIX ${GraphLab_SOURCE_DIR}/deps/qthreads
#                    URL http://graphlab.org/deps/qthreads.tar.bz2
#                    URL_MD5 fd94ae41a06f3756c1042d47cfa671d3
#                    INSTALL_DIR ${GraphLab_SOURCE_DIR}/deps/local
#                    BUILD_IN_SOURCE 1
#                    CONFIGURE_COMMAND ./configure --prefix=<INSTALL_DIR> --enable-oversubscription --with-scheduler=sherwood --enable-lf-febs -enable-condwait-queue
#                    )


## =============================================================
# Core graphlab requirements
macro(requires_core_deps NAME)
  target_link_libraries(${NAME}
    ${Boost_LIBRARIES}
    z
    event event_pthreads
    zookeeper_mt)
  add_dependencies(${NAME} boost libevent zookeeper)
  if(MPI_FOUND)
    target_link_libraries(${NAME} ${MPI_LIBRARY} ${MPI_EXTRA_LIBRARY})
  endif(MPI_FOUND)
  if(HADOOP_FOUND)
    target_link_libraries(${NAME} hdfs ${JAVA_JVM_LIBRARY})
    add_dependencies(${NAME} hadoop)
  endif(HADOOP_FOUND)
  if(NOT NO_TCMALLOC)
    target_link_libraries(${NAME} tcmalloc)
    add_dependencies(${NAME} libtcmalloc)
  endif()
endmacro(requires_core_deps)


## ============================================================================
# System Checks

## ============================================================================
# Test for cpu affinity support
set(CMAKE_REQUIRED_LIBRARIES "pthread")
check_function_exists(pthread_setaffinity_np HAS_SET_AFFINITY)
set(CMAKE_REQUIRED_LIBRARIES ${crlbackup})

include(CheckCXXCompilerFlag)
## ============================================================================
# check if MARCH is set
if (NOT MARCH)
  #set to native if supported
  check_cxx_compiler_flag(-march=native HAS_MARCH_NATIVE)
  if(HAS_MARCH_NATIVE)
    set(MARCH "native")
  else()
    set(MARCH "x86-64")
  endif()
endif()

# check if MTUNE is set
if (NOT MTUNE)
  #set to native if supported
  check_cxx_compiler_flag(-mtune=native HAS_MTUNE_NATIVE)
  if(HAS_MTUNE_NATIVE)
    set(MTUNE "native")
  else()
    set(MTUNE "x86-64")
  endif()
endif()

## ============================================================================
# Setup compiler

# If profiling mode is enabled then additional profiling flags are set for
# the compiler
if (COMPILE_PROFILING MATCHES 1)
  set(PROFILING_FLAGS "-DUSE_EVENT_LOG -DUSE_TRACEPOINT")
else()
  set(PROFILING_FLAGS "")
endif()

# check for SSE instruction set (needed for CRC32)
#check_cxx_compiler_flag(-msse4.2 HAS_SSE42)
#set(SSE42_FLAG "")
#if (HAS_SSE42)
# set(SSE42_FLAG "-msse4.2")
#endif()


#disable Wno-unused-local-typedefs if available
check_cxx_compiler_flag(-Wno-unused-local-typedefs HAS_WNO_LOCAL_TYPEDEFS)
if(HAS_WNO_LOCAL_TYPEDEFS)
  set(WNO_LOCAL_TYPEDEFS "-Wno-unused-local-typedefs")
else()
  set(WNO_LOCAL_TYPEDEFS "")
endif()


# Set the debug flags
set(CMAKE_C_FLAGS_DEBUG
  "-O0 -Wno-attributes -march=${MARCH} -Winit-self ${PROFILING_FLAGS} ${COMPILER_FLAGS}"
  CACHE STRING "compiler options" FORCE)
set(CMAKE_CXX_FLAGS_DEBUG
  "-O0 ${WNO_LOCAL_TYPEDEFS} -Wno-attributes -march=${MARCH} -Winit-self ${PROFILING_FLAGS} ${COMPILER_FLAGS}"
  CACHE STRING "compiler options" FORCE)

set(CMAKE_C_FLAGS_RELEASE
  "-O3 -Wno-attributes -march=${MARCH} -mtune=${MTUNE} ${PROFILING_FLAGS} ${COMPILER_FLAGS}"
  CACHE STRING "compiler options" FORCE)
set(CMAKE_CXX_FLAGS_RELEASE
  "-O3 ${WNO_LOCAL_TYPEDEFS} -Wno-attributes -march=${MARCH} -mtune=${MTUNE} ${PROFILING_FLAGS} ${COMPILER_FLAGS}"
  CACHE STRING "compiler options" FORCE)

set(CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
check_cxx_source_compiles("int main(int argc, char** argv) { return __builtin_ia32_crc32di(0,0);}" HAS_CRC32)
set(CMAKE_REQUIRED_FLAGS "")
if(HAS_CRC32)
  set(CMAKE_CXX_FLAGS_RELEASE
      "${CMAKE_CXX_FLAGS_RELEASE} -DHAS_BUILTIN_CRC32"
      CACHE STRING "compiler options" FORCE)
endif()

# check for gcc 4.8


# # Mex setup
# set(CMAKE_CXX_FLAGS_MEX
#   "${CMAKE_CXX_FLAGS_RELEASE} -D_GNU_SOURCE -fPIC -fno-omit-frame-pointer -pthread ")
# set(CMAKE_C_FLAGS_MEX
#   "${CMAKE_C_FLAGS_RELEASE}   -D_GNU_SOURCE  -fexceptions -fPIC -fno-omit-frame-pointer -pthread ")


if (CMAKE_BUILD_TYPE MATCHES "Release")
  message(STATUS "Release build with C++ flags: " ${CMAKE_CXX_FLAGS_RELEASE})
  message(STATUS "Release build with C flags: " ${CMAKE_C_FLAGS_RELEASE})
elseif(CMAKE_BUILD_TYPE MATCHES "Debug")
  message(STATUS "Debug build with C++ flags: " ${CMAKE_CXX_FLAGS_DEBUG})
  message(STATUS "Debug build with C flags: " ${CMAKE_C_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE MATCHES "Mex")
  message(STATUS "Mex CXX" ${CMAKE_CXX_FLAGS_MEX})
  message(STATUS "Mex C" ${CMAKE_C_FLAGS_MEX})
else()
  message(WARNING "Unknown build type: " ${CMAKE_BUILD_TYPE} "!")
endif()


## ============================================================================
## ============================================================================
## ============================================================================
# Setup testing tools
# Make sure testing is enabled
enable_testing()
# Use Python interpreter
find_package(PythonInterp)
set(CXXTESTGEN ${CMAKE_SOURCE_DIR}/cxxtest/cxxtestgen)
# create a macro to define a test
macro(ADD_CXXTEST NAME)
  if(PYTHONINTERP_FOUND)
    add_custom_command(
      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.cpp
      COMMAND
      ${PYTHON_EXECUTABLE} ${CXXTESTGEN}
      --runner=ErrorPrinter
      -o ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.cpp ${ARGV}
      DEPENDS ${ARGV}
      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
      )
  endif(PYTHONINTERP_FOUND)
  add_graphlab_executable(${NAME}test ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.cpp)
  set_source_files_properties( ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.cpp
    PROPERTIES COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}" )

  add_test(${NAME} ${NAME}test)
endmacro(ADD_CXXTEST)


# macro(ADD_CPPTEST NAME)
#   add_graphlab_executable(${NAME}test ${NAME})
#   set_source_files_properties(${NAME}
#     PROPERTIES COMPILE_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR}" )
#   add_test(${NAME} ${NAME}test)
# endmacro(ADD_CPPTEST)


## ============================================================================
## ============================================================================
## ============================================================================
# Build Macros

# copy_file ===================================================================
# copy a single file into build environment
macro(copy_file NAME)
  message(STATUS "Copying File: " ${NAME})
  file(INSTALL ${CMAKE_CURRENT_SOURCE_DIR}/${NAME}
    DESTINATION   ${CMAKE_CURRENT_BINARY_DIR} )
endmacro(copy_file NAME)

# copy_files ==================================================================
# copy all files matching a pattern into the build environment
macro(copy_files NAME)
  message(STATUS "Copying Files: " ${NAME})
  file(INSTALL ${CMAKE_CURRENT_SOURCE_DIR}/
    DESTINATION  ${CMAKE_CURRENT_BINARY_DIR}
    FILES_MATCHING PATTERN ${NAME} )
endmacro(copy_files NAME)


# macro(EXEC file prog)
#   add_custom_command(
#     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${file}
#     COMMAND
#     ${prog}
#     DEPENDS ${prog}
#     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
#     )
# endmacro(EXEC)

# add_graphlab_executable =====================================================
# copy all files matching a pattern into the build environment
macro(add_graphlab_executable NAME)
  add_executable(${NAME} ${ARGN})
  target_link_libraries(${NAME} graphlab)
  add_dependencies(${NAME} graphlab)
  requires_core_deps(${NAME})
endmacro(add_graphlab_executable)


macro(add_extension_executable NAME)
  add_executable(${NAME} extension_main.cpp ${ARGN})
#  if(!APPLE)
#set_target_properties(${NAME} PROPERTIES LINK_FLAGS -Wl,-wrap,main)
#  endif()
  target_link_libraries(${NAME} graphlab graphlab_extension)
  add_dependencies(${NAME} graphlab graphlab_extension)
  requires_core_deps(${NAME})
endmacro(add_extension_executable)


add_custom_target(external_dependencies)
requires_core_deps(external_dependencies)

# add_jni_library =============================================================
# If jni was found then create a jni library.  Otherwise generate a warning
macro(add_jni_library NAME)
  # Only build if JNI was found
  if (JNI_REALLY_FOUND)
    include_directories(
      ${JAVA_INCLUDE_PATH}
      ${JAVA_INCLUDE_PATH}/linux
      )
    message(STATUS "Detected JNI library " ${NAME})
    add_library(${NAME} SHARED ${ARGN})
    target_link_libraries(${NAME}  graphlab_pic)
    target_link_libraries(${NAME} ${Boost_SHARED_LIBRARIES})
    # IF (APPLE)
    #         SET(CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS "-dynamiclib -m64 ")
    #         SET_TARGET_PROPERTIES(${NAME} PROPERTIES SUFFIX .jnilib)
    # ENDIF (APPLE)
  elseif ()
    message(STATUS "Not building " ${NAME} " because JNI was not found")
  endif ()
endmacro(add_jni_library)


================================================
FILE: Doxyfile
================================================
# Doxyfile 1.5.8

# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project
#
# All text after a hash (#) is considered a comment and will be ignored
# The format is:
#       TAG = value [value, ...]
# For lists items can also be appended using:
#       TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (" ")

#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------

# This tag specifies the encoding used for all characters in the config file 
# that follow. The default is UTF-8 which is also the encoding used for all 
# text before the first occurrence of this tag. Doxygen uses libiconv (or the 
# iconv built into libc) for the transcoding. See 
# http://www.gnu.org/software/libiconv for the list of possible encodings.

DOXYFILE_ENCODING      = UTF-8

# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
# by quotes) that should identify the project.

PROJECT_NAME           = "GraphLab: Distributed Graph-Parallel API"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
# This could be handy for archiving the generated documentation or 
# if some version control system is used.

PROJECT_NUMBER         = 2.2

# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
# base path where the generated documentation will be put. 
# If a relative path is entered, it will be relative to the location 
# where doxygen was started. If left blank the current directory will be used.

OUTPUT_DIRECTORY       = doc/doxygen

# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
# 4096 sub-directories (in 2 levels) under the output directory of each output 
# format and will distribute the generated files over these directories. 
# Enabling this option can be useful when feeding doxygen a huge amount of 
# source files, where putting all generated files in the same directory would 
# otherwise cause performance problems for the file system.

CREATE_SUBDIRS         = NO

# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
# documentation generated by doxygen is written. Doxygen will use this 
# information to generate all constant output in the proper language. 
# The default language is English, other supported languages are: 
# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, 
# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), 
# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, 
# Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, 
# Spanish, Swedish, and Ukrainian.

OUTPUT_LANGUAGE        = English

# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
# include brief member descriptions after the members that are listed in 
# the file and class documentation (similar to JavaDoc). 
# Set to NO to disable this.

BRIEF_MEMBER_DESC      = YES

# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
# the brief description of a member or function before the detailed description. 
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
# brief descriptions will be completely suppressed.

REPEAT_BRIEF           = YES

# This tag implements a quasi-intelligent brief description abbreviator 
# that is used to form the text in various listings. Each string 
# in this list, if found as the leading text of the brief description, will be 
# stripped from the text and the result after processing the whole list, is 
# used as the annotated text. Otherwise, the brief description is used as-is. 
# If left blank, the following values are used ("$name" is automatically 
# replaced with the name of the entity): "The $name class" "The $name widget" 
# "The $name file" "is" "provides" "specifies" "contains" 
# "represents" "a" "an" "the"

ABBREVIATE_BRIEF       = 

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
# Doxygen will generate a detailed section even if there is only a brief 
# description.

ALWAYS_DETAILED_SEC    = NO

# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
# inherited members of a class in the documentation of that class as if those 
# members were ordinary class members. Constructors, destructors and assignment 
# operators of the base classes will not be shown.

INLINE_INHERITED_MEMB  = YES

# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
# path before files name in the file list and in the header files. If set 
# to NO the shortest path that makes the file name unique will be used.

FULL_PATH_NAMES        = YES

# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
# can be used to strip a user-defined part of the path. Stripping is 
# only done if one of the specified strings matches the left-hand part of 
# the path. The tag can be used to show relative paths in the file list. 
# If left blank the directory from which doxygen is run is used as the 
# path to strip.

STRIP_FROM_PATH        = src/ 

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
# the path mentioned in the documentation of a class, which tells 
# the reader which header file to include in order to use a class. 
# If left blank only the name of the header file containing the class 
# definition is used. Otherwise one should specify the include paths that 
# are normally passed to the compiler using the -I flag.

STRIP_FROM_INC_PATH    = src/ 

# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
# (but less readable) file names. This can be useful is your file systems 
# doesn't support long names like on DOS, Mac, or CD-ROM.

SHORT_NAMES            = NO

# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
# will interpret the first line (until the first dot) of a JavaDoc-style 
# comment as the brief description. If set to NO, the JavaDoc 
# comments will behave just like regular Qt-style comments 
# (thus requiring an explicit @brief command for a brief description.)

JAVADOC_AUTOBRIEF      = NO

# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
# interpret the first line (until the first dot) of a Qt-style 
# comment as the brief description. If set to NO, the comments 
# will behave just like regular Qt-style comments (thus requiring 
# an explicit \brief command for a brief description.)

QT_AUTOBRIEF           = NO

# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
# comments) as a brief description. This used to be the default behaviour. 
# The new default is to treat a multi-line C++ comment block as a detailed 
# description. Set this tag to YES if you prefer the old behaviour instead.

MULTILINE_CPP_IS_BRIEF = NO

# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
# member inherits the documentation from any documented member that it 
# re-implements.

INHERIT_DOCS           = YES

# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
# a new page for each member. If set to NO, the documentation of a member will 
# be part of the file/class/namespace that contains it.

SEPARATE_MEMBER_PAGES  = NO

# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
# Doxygen uses this value to replace tabs by spaces in code fragments.

TAB_SIZE               = 2

# This tag can be used to specify a number of aliases that acts 
# as commands in the documentation. An alias has the form "name=value". 
# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
# put the command \sideeffect (or @sideeffect) in the documentation, which 
# will result in a user-defined paragraph with heading "Side Effects:". 
# You can put \n's in the value part of an alias to insert newlines.

ALIASES                = 

# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
# sources only. Doxygen will then generate output that is more tailored for C. 
# For instance, some of the names that are used will be different. The list 
# of all members will be omitted, etc.

OPTIMIZE_OUTPUT_FOR_C  = NO

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
# sources only. Doxygen will then generate output that is more tailored for 
# Java. For instance, namespaces will be presented as packages, qualified 
# scopes will look different, etc.

OPTIMIZE_OUTPUT_JAVA   = NO

# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 
# sources only. Doxygen will then generate output that is more tailored for 
# Fortran.

OPTIMIZE_FOR_FORTRAN   = NO

# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 
# sources. Doxygen will then generate output that is tailored for 
# VHDL.

OPTIMIZE_OUTPUT_VHDL   = NO

# Doxygen selects the parser to use depending on the extension of the files it parses. 
# With this tag you can assign which parser to use for a given extension. 
# Doxygen has a built-in mapping, but you can override or extend it using this tag. 
# The format is ext=language, where ext is a file extension, and language is one of 
# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, 
# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat 
# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), 
# use: inc=Fortran f=C

EXTENSION_MAPPING      = 

# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 
# to include (a tag file for) the STL sources as input, then you should 
# set this tag to YES in order to let doxygen match functions declarations and 
# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
# func(std::string) {}). This also make the inheritance and collaboration 
# diagrams that involve STL classes more complete and accurate.

BUILTIN_STL_SUPPORT    = YES

# If you use Microsoft's C++/CLI language, you should set this option to YES to 
# enable parsing support.

CPP_CLI_SUPPORT        = NO

# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 
# Doxygen will parse them like normal C++ but will assume all classes use public 
# instead of private inheritance when no explicit protection keyword is present.

SIP_SUPPORT            = NO

# For Microsoft's IDL there are propget and propput attributes to indicate getter 
# and setter methods for a property. Setting this option to YES (the default) 
# will make doxygen to replace the get and set methods by a property in the 
# documentation. This will only work if the methods are indeed getting or 
# setting a simple type. If this is not the case, or you want to show the 
# methods anyway, you should set this option to NO.

IDL_PROPERTY_SUPPORT   = NO

# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
# tag is set to YES, then doxygen will reuse the documentation of the first 
# member in the group (if any) for the other members of the group. By default 
# all members of a group must be documented explicitly.

DISTRIBUTE_GROUP_DOC   = NO

# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
# the same type (for instance a group of public functions) to be put as a 
# subgroup of that type (e.g. under the Public Functions section). Set it to 
# NO to prevent subgrouping. Alternatively, this can be done per class using 
# the \nosubgrouping command.

SUBGROUPING            = YES

# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 
# is documented as struct, union, or enum with the name of the typedef. So 
# typedef struct TypeS {} TypeT, will appear in the documentation as a struct 
# with name TypeT. When disabled the typedef will appear as a member of a file, 
# namespace, or class. And the struct will be named TypeS. This can typically 
# be useful for C code in case the coding convention dictates that all compound 
# types are typedef'ed and only the typedef is referenced, never the tag name.

TYPEDEF_HIDES_STRUCT   = NO

# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to 
# determine which symbols to keep in memory and which to flush to disk. 
# When the cache is full, less often used symbols will be written to disk. 
# For small to medium size projects (<1000 input files) the default value is 
# probably good enough. For larger projects a too small cache size can cause 
# doxygen to be busy swapping symbols to and from disk most of the time 
# causing a significant performance penality. 
# If the system has enough physical memory increasing the cache will improve the 
# performance by keeping more symbols in memory. Note that the value works on 
# a logarithmic scale so increasing the size by one will rougly double the 
# memory usage. The cache size is given by this formula: 
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, 
# corresponding to a cache size of 2^16 = 65536 symbols

SYMBOL_CACHE_SIZE      = 0

#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------

# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
# documentation are documented, even if no documentation was available. 
# Private class members and static file members will be hidden unless 
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL            = NO

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
# will be included in the documentation.

EXTRACT_PRIVATE        = NO

# If the EXTRACT_STATIC tag is set to YES all static members of a file 
# will be included in the documentation.

EXTRACT_STATIC         = NO

# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
# defined locally in source files will be included in the documentation. 
# If set to NO only classes defined in header files are included.

EXTRACT_LOCAL_CLASSES  = NO

# This flag is only useful for Objective-C code. When set to YES local 
# methods, which are defined in the implementation section but not in 
# the interface are included in the documentation. 
# If set to NO (the default) only methods in the interface are included.

EXTRACT_LOCAL_METHODS  = NO

# If this flag is set to YES, the members of anonymous namespaces will be 
# extracted and appear in the documentation as a namespace called 
# 'anonymous_namespace{file}', where file will be replaced with the base 
# name of the file that contains the anonymous namespace. By default 
# anonymous namespace are hidden.

EXTRACT_ANON_NSPACES   = NO

# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
# undocumented members of documented classes, files or namespaces. 
# If set to NO (the default) these members will be included in the 
# various overviews, but no documentation section is generated. 
# This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_MEMBERS     = NO

# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
# undocumented classes that are normally visible in the class hierarchy. 
# If set to NO (the default) these classes will be included in the various 
# overviews. This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_CLASSES     = YES

# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
# friend (class|struct|union) declarations. 
# If set to NO (the default) these declarations will be included in the 
# documentation.

HIDE_FRIEND_COMPOUNDS  = YES

# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
# documentation blocks found inside the body of a function. 
# If set to NO (the default) these blocks will be appended to the 
# function's detailed documentation block.

HIDE_IN_BODY_DOCS      = YES

# The INTERNAL_DOCS tag determines if documentation 
# that is typed after a \internal command is included. If the tag is set 
# to NO (the default) then the documentation will be excluded. 
# Set it to YES to include the internal documentation.

INTERNAL_DOCS          = NO

# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
# file names in lower-case letters. If set to YES upper-case letters are also 
# allowed. This is useful if you have classes or files whose names only differ 
# in case and if your file system supports case sensitive file names. Windows 
# and Mac users are advised to set this option to NO.

CASE_SENSE_NAMES       = YES

# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
# will show members with their full class and namespace scopes in the 
# documentation. If set to YES the scope will be hidden.

HIDE_SCOPE_NAMES       = NO

# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
# will put a list of the files that are included by a file in the documentation 
# of that file.

SHOW_INCLUDE_FILES     = YES

# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
# is inserted in the documentation for inline members.

INLINE_INFO            = YES

# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
# will sort the (detailed) documentation of file and class members 
# alphabetically by member name. If set to NO the members will appear in 
# declaration order.

SORT_MEMBER_DOCS       = YES

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
# brief documentation of file, namespace and class members alphabetically 
# by member name. If set to NO (the default) the members will appear in 
# declaration order.

SORT_BRIEF_DOCS        = NO

# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 
# hierarchy of group names into alphabetical order. If set to NO (the default) 
# the group names will appear in their defined order.

SORT_GROUP_NAMES       = NO

# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
# sorted by fully-qualified names, including namespaces. If set to 
# NO (the default), the class list will be sorted only by class name, 
# not including the namespace part. 
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 
# Note: This option applies only to the class list, not to the 
# alphabetical list.

SORT_BY_SCOPE_NAME     = NO

# The GENERATE_TODOLIST tag can be used to enable (YES) or 
# disable (NO) the todo list. This list is created by putting \todo 
# commands in the documentation.

GENERATE_TODOLIST      = NO

# The GENERATE_TESTLIST tag can be used to enable (YES) or 
# disable (NO) the test list. This list is created by putting \test 
# commands in the documentation.

GENERATE_TESTLIST      = NO

# The GENERATE_BUGLIST tag can be used to enable (YES) or 
# disable (NO) the bug list. This list is created by putting \bug 
# commands in the documentation.

GENERATE_BUGLIST       = NO

# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
# disable (NO) the deprecated list. This list is created by putting 
# \deprecated commands in the documentation.

GENERATE_DEPRECATEDLIST= NO

# The ENABLED_SECTIONS tag can be used to enable conditional 
# documentation sections, marked by \if sectionname ... \endif.

ENABLED_SECTIONS       = 

# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
# the initial value of a variable or define consists of for it to appear in 
# the documentation. If the initializer consists of more lines than specified 
# here it will be hidden. Use a value of 0 to hide initializers completely. 
# The appearance of the initializer of individual variables and defines in the 
# documentation can be controlled using \showinitializer or \hideinitializer 
# command in the documentation regardless of this setting.

MAX_INITIALIZER_LINES  = 30

# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
# at the bottom of the documentation of classes and structs. If set to YES the 
# list will mention the files that were used to generate the documentation.

SHOW_USED_FILES        = YES

# If the sources in your project are distributed over multiple directories 
# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
# in the documentation. The default is NO.

SHOW_DIRECTORIES       = YES

# Set the SHOW_FILES tag to NO to disable the generation of the Files page. 
# This will remove the Files entry from the Quick Index and from the 
# Folder Tree View (if specified). The default is YES.

SHOW_FILES             = YES

# Set the SHOW_NAMESPACES tag to NO to disable the generation of the 
# Namespaces page. 
# This will remove the Namespaces entry from the Quick Index 
# and from the Folder Tree View (if specified). The default is YES.

SHOW_NAMESPACES        = YES

# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
# doxygen should invoke to get the current version for each file (typically from 
# the version control system). Doxygen will invoke the program by executing (via 
# popen()) the command <command> <input-file>, where <command> is the value of 
# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
# provided by doxygen. Whatever the program writes to standard output 
# is used as the file version. See the manual for examples.

FILE_VERSION_FILTER    = 

# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by 
# doxygen. The layout file controls the global structure of the generated output files 
# in an output format independent way. The create the layout file that represents 
# doxygen's defaults, run doxygen with the -l option. You can optionally specify a 
# file name after the option, if omitted DoxygenLayout.xml will be used as the name 
# of the layout file.

LAYOUT_FILE            = 

#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------

# The QUIET tag can be used to turn on/off the messages that are generated 
# by doxygen. Possible values are YES and NO. If left blank NO is used.

QUIET                  = NO

# The WARNINGS tag can be used to turn on/off the warning messages that are 
# generated by doxygen. Possible values are YES and NO. If left blank 
# NO is used.

WARNINGS               = YES

# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
# automatically be disabled.

WARN_IF_UNDOCUMENTED   = YES

# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
# potential errors in the documentation, such as not documenting some 
# parameters in a documented function, or documenting parameters that 
# don't exist or using markup commands wrongly.

WARN_IF_DOC_ERROR      = YES

# This WARN_NO_PARAMDOC option can be abled to get warnings for 
# functions that are documented, but have no documentation for their parameters 
# or return value. If set to NO (the default) doxygen will only warn about 
# wrong or incomplete parameter documentation, but not about the absence of 
# documentation.

WARN_NO_PARAMDOC       = NO

# The WARN_FORMAT tag determines the format of the warning messages that 
# doxygen can produce. The string should contain the $file, $line, and $text 
# tags, which will be replaced by the file and line number from which the 
# warning originated and the warning text. Optionally the format may contain 
# $version, which will be replaced by the version of the file (if it could 
# be obtained via FILE_VERSION_FILTER)

WARN_FORMAT            = "$file:$line: $text"

# The WARN_LOGFILE tag can be used to specify a file to which warning 
# and error messages should be written. If left blank the output is written 
# to stderr.

WARN_LOGFILE           = doxygen.log 

#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------

# The INPUT tag can be used to specify the files and/or directories that contain 
# documented source files. You may enter file names like "myfile.cpp" or 
# directories like "/usr/src/myproject". Separate the files or directories 
# with spaces.

INPUT                  = src \
                         demoapps \
                         toolkits

# This tag can be used to specify the character encoding of the source files 
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
# also the default input encoding. Doxygen uses libiconv (or the iconv built 
# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
# the list of possible encodings.

INPUT_ENCODING         = UTF-8

# If the value of the INPUT tag contains directories, you can use the 
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank the following patterns are tested: 
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90

FILE_PATTERNS          = *.hpp *.cpp *.dox 

# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
# should be searched for input files as well. Possible values are YES and NO. 
# If left blank NO is used.

RECURSIVE              = YES

# The EXCLUDE tag can be used to specify files and/or directories that should 
# excluded from the INPUT source files. This way you can easily exclude a 
# subdirectory from a directory tree whose root is specified with the INPUT tag.

EXCLUDE                = src/graphlab/matlab src/graphlab/gpu 

# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
# directories that are symbolic links (a Unix filesystem feature) are excluded 
# from the input.

EXCLUDE_SYMLINKS       = NO

# If the value of the INPUT tag contains directories, you can use the 
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
# certain files from those directories. Note that the wildcards are matched 
# against the file with absolute path, so to exclude all test directories 
# for example use the pattern */test/*

EXCLUDE_PATTERNS       =  */src/graphlab/rpc/*issue.hpp */src/graphlab/rpc/*dispatch.hpp */toolkits/*cpp */toolkits/*hpp

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
# (namespaces, classes, functions, etc.) that should be excluded from the 
# output. The symbol name can be a fully qualified name, a word, or if the 
# wildcard * is used, a substring. Examples: ANamespace, AClass, 
# AClass::ANamespace, ANamespace::*Test

EXCLUDE_SYMBOLS        = graphlab::archive_detail::* graphlab::dc_impl::*

# The EXAMPLE_PATH tag can be used to specify one or more files or 
# directories that contain example code fragments that are included (see 
# the \include command).

EXAMPLE_PATH           = 

# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank all files are included.

EXAMPLE_PATTERNS       = 

# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
# searched for input files to be used with the \include or \dontinclude 
# commands irrespective of the value of the RECURSIVE tag. 
# Possible values are YES and NO. If left blank NO is used.

EXAMPLE_RECURSIVE      = NO

# The IMAGE_PATH tag can be used to specify one or more files or 
# directories that contain image that are included in the documentation (see 
# the \image command).

IMAGE_PATH             = doc/images

# The INPUT_FILTER tag can be used to specify a program that doxygen should 
# invoke to filter for each input file. Doxygen will invoke the filter program 
# by executing (via popen()) the command <filter> <input-file>, where <filter> 
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
# input file. Doxygen will then use the output that the filter program writes 
# to standard output. 
# If FILTER_PATTERNS is specified, this tag will be 
# ignored.

INPUT_FILTER           = 

# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
# basis. 
# Doxygen will compare the file name with each pattern and apply the 
# filter if there is a match. 
# The filters are a list of the form: 
# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
# is applied to all files.

FILTER_PATTERNS        = 

# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
# INPUT_FILTER) will be used to filter the input files when producing source 
# files to browse (i.e. when SOURCE_BROWSER is set to YES).

FILTER_SOURCE_FILES    = NO

#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------

# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
# be generated. Documented entities will be cross-referenced with these sources. 
# Note: To get rid of all source code in the generated output, make sure also 
# VERBATIM_HEADERS is set to NO.

SOURCE_BROWSER         = YES

# Setting the INLINE_SOURCES tag to YES will include the body 
# of functions and classes directly in the documentation.

INLINE_SOURCES         = NO

# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
# doxygen to hide any special comment blocks from generated source code 
# fragments. Normal C and C++ comments will always remain visible.

STRIP_CODE_COMMENTS    = NO

# If the REFERENCED_BY_RELATION tag is set to YES 
# then for each documented function all documented 
# functions referencing it will be listed.

REFERENCED_BY_RELATION = NO

# If the REFERENCES_RELATION tag is set to YES 
# then for each documented function all documented entities 
# called/used by that function will be listed.

REFERENCES_RELATION    = NO

# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 
# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 
# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 
# link to the source code. 
# Otherwise they will link to the documentation.

REFERENCES_LINK_SOURCE = YES

# If the USE_HTAGS tag is set to YES then the references to source code 
# will point to the HTML generated by the htags(1) tool instead of doxygen 
# built-in source browser. The htags tool is part of GNU's global source 
# tagging system (see http://www.gnu.org/software/global/global.html). You 
# will need version 4.8.6 or higher.

USE_HTAGS              = NO

# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
# will generate a verbatim copy of the header file for each class for 
# which an include is specified. Set to NO to disable this.

VERBATIM_HEADERS       = NO

#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------

# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
# of all compounds will be generated. Enable this if the project 
# contains a lot of classes, structs, unions or interfaces.

ALPHABETICAL_INDEX     = YES

# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
# in which this list will be split (can be a number in the range [1..20])

COLS_IN_ALPHA_INDEX    = 5

# In case all classes in a project start with a common prefix, all 
# classes will be put under the same header in the alphabetical index. 
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
# should be ignored while generating the index headers.

IGNORE_PREFIX          = 

#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------

# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
# generate HTML output.

GENERATE_HTML          = YES

# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `html' will be used as the default path.

HTML_OUTPUT            = html

# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
# doxygen will generate files with .html extension.

HTML_FILE_EXTENSION    = .html

# The HTML_HEADER tag can be used to specify a personal HTML header for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard header.

HTML_HEADER            = 

# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard footer.

HTML_FOOTER            = 

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
# style sheet that is used by each HTML page. It can be used to 
# fine-tune the look of the HTML output. If the tag is left blank doxygen 
# will generate a default style sheet. Note that doxygen will try to copy 
# the style sheet file to the HTML output directory, so don't put your own 
# stylesheet in the HTML output directory as well, or it will be erased!

HTML_STYLESHEET        = 

# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
# files or namespaces will be aligned in HTML using tables. If set to 
# NO a bullet list will be used.

HTML_ALIGN_MEMBERS     = YES

# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
# documentation will contain sections that can be hidden and shown after the 
# page has loaded. For this to work a browser that supports 
# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 
# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).

HTML_DYNAMIC_SECTIONS  = YES

# If the GENERATE_DOCSET tag is set to YES, additional index files 
# will be generated that can be used as input for Apple's Xcode 3 
# integrated development environment, introduced with OSX 10.5 (Leopard). 
# To create a documentation set, doxygen will generate a Makefile in the 
# HTML output directory. Running make will produce the docset in that 
# directory and running "make install" will install the docset in 
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
# it at startup. 
# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.

GENERATE_DOCSET        = NO

# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 
# feed. A documentation feed provides an umbrella under which multiple 
# documentation sets from a single provider (such as a company or product suite) 
# can be grouped.

DOCSET_FEEDNAME        = "Doxygen generated docs"

# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 
# should uniquely identify the documentation set bundle. This should be a 
# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 
# will append .docset to the name.

DOCSET_BUNDLE_ID       = org.doxygen.Project

# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
# will be generated that can be used as input for tools like the 
# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 
# of the generated HTML documentation.

GENERATE_HTMLHELP      = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
# be used to specify the file name of the resulting .chm file. You 
# can add a path in front of the file if the result should not be 
# written to the html output directory.

CHM_FILE               = 

# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
# be used to specify the location (absolute path including file name) of 
# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
# the HTML help compiler on the generated index.hhp.

HHC_LOCATION           = 

# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
# controls if a separate .chi index file is generated (YES) or that 
# it should be included in the master .chm file (NO).

GENERATE_CHI           = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 
# is used to encode HtmlHelp index (hhk), content (hhc) and project file 
# content.

CHM_INDEX_ENCODING     = 

# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
# controls whether a binary table of contents is generated (YES) or a 
# normal table of contents (NO) in the .chm file.

BINARY_TOC             = NO

# The TOC_EXPAND flag can be set to YES to add extra items for group members 
# to the contents of the HTML help documentation and to the tree view.

TOC_EXPAND             = NO

# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER 
# are set, an additional index file will be generated that can be used as input for 
# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated 
# HTML documentation.

GENERATE_QHP           = NO

# If the QHG_LOCATION tag is specified, the QCH_FILE tag can 
# be used to specify the file name of the resulting .qch file. 
# The path specified is relative to the HTML output folder.

QCH_FILE               = 

# The QHP_NAMESPACE tag specifies the namespace to use when generating 
# Qt Help Project output. For more information please see 
# http://doc.trolltech.com/qthelpproject.html#namespace

QHP_NAMESPACE          = 

# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 
# Qt Help Project output. For more information please see 
# http://doc.trolltech.com/qthelpproject.html#virtual-folders

QHP_VIRTUAL_FOLDER     = doc

# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. 
# For more information please see 
# http://doc.trolltech.com/qthelpproject.html#custom-filters

QHP_CUST_FILTER_NAME   = 

# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see 
# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.

QHP_CUST_FILTER_ATTRS  = 

# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's 
# filter section matches. 
# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.

QHP_SECT_FILTER_ATTRS  = 

# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 
# be used to specify the location of Qt's qhelpgenerator. 
# If non-empty doxygen will try to run qhelpgenerator on the generated 
# .qhp file.

QHG_LOCATION           = 

# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
# top of each HTML page. The value NO (the default) enables the index and 
# the value YES disables it.

DISABLE_INDEX          = NO

# This tag can be used to set the number of enum values (range [1..20]) 
# that doxygen will group on one line in the generated HTML documentation.

ENUM_VALUES_PER_LINE   = 4

# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 
# structure should be generated to display hierarchical information. 
# If the tag value is set to FRAME, a side panel will be generated 
# containing a tree-like index structure (just like the one that 
# is generated for HTML Help). For this to work a browser that supports 
# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
# probably better off using the HTML help feature. Other possible values 
# for this tag are: HIERARCHIES, which will generate the Groups, Directories, 
# and Class Hierarchy pages using a tree view instead of an ordered list; 
# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which 
# disables this behavior completely. For backwards compatibility with previous 
# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE 
# respectively.

GENERATE_TREEVIEW      = YES

# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
# used to set the initial width (in pixels) of the frame in which the tree 
# is shown.

TREEVIEW_WIDTH         = 250

# Use this tag to change the font size of Latex formulas included 
# as images in the HTML documentation. The default is 10. Note that 
# when you change the font size after a successful doxygen run you need 
# to manually remove any form_*.png images from the HTML output directory 
# to force them to be regenerated.

FORMULA_FONTSIZE       = 10

#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------

# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
# generate Latex output.

GENERATE_LATEX         = NO

# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `latex' will be used as the default path.

LATEX_OUTPUT           = latex

# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
# invoked. If left blank `latex' will be used as the default command name.

LATEX_CMD_NAME         = latex

# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
# generate index for LaTeX. If left blank `makeindex' will be used as the 
# default command name.

MAKEINDEX_CMD_NAME     = makeindex

# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
# LaTeX documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_LATEX          = NO

# The PAPER_TYPE tag can be used to set the paper type that is used 
# by the printer. Possible values are: a4, a4wide, letter, legal and 
# executive. If left blank a4wide will be used.

PAPER_TYPE             = a4wide

# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
# packages that should be included in the LaTeX output.

EXTRA_PACKAGES         = 

# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
# the generated latex document. The header should contain everything until 
# the first chapter. If it is left blank doxygen will generate a 
# standard header. Notice: only use this tag if you know what you are doing!

LATEX_HEADER           = 

# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
# contain links (just like the HTML output) instead of page references 
# This makes the output suitable for online browsing using a pdf viewer.

PDF_HYPERLINKS         = YES

# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
# plain latex in the generated Makefile. Set this option to YES to get a 
# higher quality PDF documentation.

USE_PDFLATEX           = YES

# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
# command to the generated LaTeX files. This will instruct LaTeX to keep 
# running if errors occur, instead of asking the user for help. 
# This option is also used when generating formulas in HTML.

LATEX_BATCHMODE        = NO

# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
# include the index chapters (such as File Index, Compound Index, etc.) 
# in the output.

LATEX_HIDE_INDICES     = NO

#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------

# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
# The RTF output is optimized for Word 97 and may not look very pretty with 
# other RTF readers or editors.

GENERATE_RTF           = NO

# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `rtf' will be used as the default path.

RTF_OUTPUT             = rtf

# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
# RTF documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_RTF            = NO

# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
# will contain hyperlink fields. The RTF file will 
# contain links (just like the HTML output) instead of page references. 
# This makes the output suitable for online browsing using WORD or other 
# programs which support those fields. 
# Note: wordpad (write) and others do not support links.

RTF_HYPERLINKS         = NO

# Load stylesheet definitions from file. Syntax is similar to doxygen's 
# config file, i.e. a series of assignments. You only have to provide 
# replacements, missing definitions are set to their default value.

RTF_STYLESHEET_FILE    = 

# Set optional variables used in the generation of an rtf document. 
# Syntax is similar to doxygen's config file.

RTF_EXTENSIONS_FILE    = 

#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------

# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
# generate man pages

GENERATE_MAN           = NO

# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `man' will be used as the default path.

MAN_OUTPUT             = man

# The MAN_EXTENSION tag determines the extension that is added to 
# the generated man pages (default is the subroutine's section .3)

MAN_EXTENSION          = .3

# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
# then it will generate one additional man file for each entity 
# documented in the real man page(s). These additional files 
# only source the real man page, but without them the man command 
# would be unable to find the correct page. The default is NO.

MAN_LINKS              = NO

#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------

# If the GENERATE_XML tag is set to YES Doxygen will 
# generate an XML file that captures the structure of 
# the code including all documentation.

GENERATE_XML           = NO

# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `xml' will be used as the default path.

XML_OUTPUT             = xml

# The XML_SCHEMA tag can be used to specify an XML schema, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_SCHEMA             = 

# The XML_DTD tag can be used to specify an XML DTD, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_DTD                = 

# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
# dump the program listings (including syntax highlighting 
# and cross-referencing information) to the XML output. Note that 
# enabling this will significantly increase the size of the XML output.

XML_PROGRAMLISTING     = YES

#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------

# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
# generate an AutoGen Definitions (see autogen.sf.net) file 
# that captures the structure of the code including all 
# documentation. Note that this feature is still experimental 
# and incomplete at the moment.

GENERATE_AUTOGEN_DEF   = NO

#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------

# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
# generate a Perl module file that captures the structure of 
# the code including all documentation. Note that this 
# feature is still experimental and incomplete at the 
# moment.

GENERATE_PERLMOD       = NO

# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
# to generate PDF and DVI output from the Perl module output.

PERLMOD_LATEX          = NO

# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
# nicely formatted so it can be parsed by a human reader. 
# This is useful 
# if you want to understand what is going on. 
# On the other hand, if this 
# tag is set to NO the size of the Perl module output will be much smaller 
# and Perl will parse it just the same.

PERLMOD_PRETTY         = YES

# The names of the make variables in the generated doxyrules.make file 
# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
# This is useful so different doxyrules.make files included by the same 
# Makefile don't overwrite each other's variables.

PERLMOD_MAKEVAR_PREFIX = 

#---------------------------------------------------------------------------
# Configuration options related to the preprocessor   
#---------------------------------------------------------------------------

# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
# evaluate all C-preprocessor directives found in the sources and include 
# files.

ENABLE_PREPROCESSING   = YES 

# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
# names in the source code. If set to NO (the default) only conditional 
# compilation will be performed. Macro expansion can be done in a controlled 
# way by setting EXPAND_ONLY_PREDEF to YES.

MACRO_EXPANSION        = NO

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
# then the macro expansion is limited to the macros specified with the 
# PREDEFINED and EXPAND_AS_DEFINED tags.

EXPAND_ONLY_PREDEF     = YES

# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
# in the INCLUDE_PATH (see below) will be search if a #include is found.

SEARCH_INCLUDES        = YES

# The INCLUDE_PATH tag can be used to specify one or more directories that 
# contain include files that are not input files but should be processed by 
# the preprocessor.

INCLUDE_PATH           = 

# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
# patterns (like *.h and *.hpp) to filter out the header-files in the 
# directories. If left blank, the patterns specified with FILE_PATTERNS will 
# be used.

INCLUDE_FILE_PATTERNS  = 

# The PREDEFINED tag can be used to specify one or more macro names that 
# are defined before the preprocessor is started (similar to the -D option of 
# gcc). The argument of the tag is a list of macros of the form: name 
# or name=definition (no spaces). If the definition and the = are 
# omitted =1 is assumed. To prevent a macro definition from being 
# undefined via #undef or recursively expanded use the := operator 
# instead of the = operator.

PREDEFINED             = DOXYGEN_DOCUMENTATION GRAPHLAB_SERIALIZE_HPP 

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
# this tag can be used to specify a list of macro names that should be expanded. 
# The macro definition that is found in the sources will be used. 
# Use the PREDEFINED tag if you want to use a different macro definition.

EXPAND_AS_DEFINED      = RPC_DEFAULT_NUMHANDLERTHREADS RPC_DEFAULT_COMMTYPE 

# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
# doxygen's preprocessor will remove all function-like macros that are alone 
# on a line, have an all uppercase name, and do not end with a semicolon. Such 
# function macros are typically used for boiler-plate code, and will confuse 
# the parser if not removed.

SKIP_FUNCTION_MACROS   = YES

#---------------------------------------------------------------------------
# Configuration::additions related to external references   
#---------------------------------------------------------------------------

# The TAGFILES option can be used to specify one or more tagfiles. 
# Optionally an initial location of the external documentation 
# can be added for each tagfile. The format of a tag file without 
# this location is as follows: 
#  
# TAGFILES = file1 file2 ... 
# Adding location for the tag files is done as follows: 
#  
# TAGFILES = file1=loc1 "file2 = loc2" ... 
# where "loc1" and "loc2" can be relative or absolute paths or 
# URLs. If a location is present for each tag, the installdox tool 
# does not have to be run to correct the links. 
# Note that each tag file must have a unique name 
# (where the name does NOT include the path) 
# If a tag file is not located in the directory in which doxygen 
# is run, you must also specify the path to the tagfile here.

TAGFILES               = 

# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
# a tag file that is based on the input files it reads.

GENERATE_TAGFILE       = 

# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
# in the class index. If set to NO only the inherited external classes 
# will be listed.

ALLEXTERNALS           = NO

# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
# in the modules index. If set to NO, only the current project's groups will 
# be listed.

EXTERNAL_GROUPS        = YES

# The PERL_PATH should be the absolute path and name of the perl script 
# interpreter (i.e. the result of `which perl').

PERL_PATH              = /usr/bin/perl

#---------------------------------------------------------------------------
# Configuration options related to the dot tool   
#---------------------------------------------------------------------------

# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
# or super classes. Setting the tag to NO turns the diagrams off. Note that 
# this option is superseded by the HAVE_DOT option below. This is only a 
# fallback. It is recommended to install and use dot, since it yields more 
# powerful graphs.

CLASS_DIAGRAMS         = NO

# You can define message sequence charts within doxygen comments using the \msc 
# command. Doxygen will then run the mscgen tool (see 
# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 
# documentation. The MSCGEN_PATH tag allows you to specify the directory where 
# the mscgen tool resides. If left empty the tool is assumed to be found in the 
# default search path.

MSCGEN_PATH            = 

# If set to YES, the inheritance and collaboration graphs will hide 
# inheritance and usage relations if the target is undocumented 
# or is not a class.

HIDE_UNDOC_RELATIONS   = YES

# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
# available from the path. This tool is part of Graphviz, a graph visualization 
# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
# have no effect if this option is set to NO (the default)

HAVE_DOT               = NO

# By default doxygen will write a font called FreeSans.ttf to the output 
# directory and reference it in all dot files that doxygen generates. This 
# font does not include all possible unicode characters however, so when you need 
# these (or just want a differently looking font) you can specify the font name 
# using DOT_FONTNAME. You need need to make sure dot is able to find the font, 
# which can be done by putting it in a standard location or by setting the 
# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory 
# containing the font.

DOT_FONTNAME           = FreeSans

# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 
# The default size is 10pt.

DOT_FONTSIZE           = 10

# By default doxygen will tell dot to use the output directory to look for the 
# FreeSans.ttf font (which doxygen will put there itself). If you specify a 
# different font using DOT_FONTNAME you can set the path where dot 
# can find it using this tag.

DOT_FONTPATH           = 

# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect inheritance relations. Setting this tag to YES will force the 
# the CLASS_DIAGRAMS tag to NO.

CLASS_GRAPH            = NO

# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect implementation dependencies (inheritance, containment, and 
# class references variables) of the class with other documented classes.

COLLABORATION_GRAPH    = NO

# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for groups, showing the direct groups dependencies

GROUP_GRAPHS           = NO

# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
# collaboration diagrams in a style similar to the OMG's Unified Modeling 
# Language.

UML_LOOK               = NO

# If set to YES, the inheritance and collaboration graphs will show the 
# relations between templates and their instances.

TEMPLATE_RELATIONS     = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
# tags are set to YES then doxygen will generate a graph for each documented 
# file showing the direct and indirect include dependencies of the file with 
# other documented files.

INCLUDE_GRAPH          = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
# documented header file showing the documented files that directly or 
# indirectly include this file.

INCLUDED_BY_GRAPH      = NO

# If the CALL_GRAPH and HAVE_DOT options are set to YES then 
# doxygen will generate a call dependency graph for every global function 
# or class method. Note that enabling this option will significantly increase 
# the time of a run. So in most cases it will be better to enable call graphs 
# for selected functions only using the \callgraph command.

CALL_GRAPH             = NO

# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 
# doxygen will generate a caller dependency graph for every global function 
# or class method. Note that enabling this option will significantly increase 
# the time of a run. So in most cases it will be better to enable caller 
# graphs for selected functions only using the \callergraph command.

CALLER_GRAPH           = NO

# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
# will graphical hierarchy of all classes instead of a textual one.

GRAPHICAL_HIERARCHY    = YES

# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
# then doxygen will show the dependencies a directory has on other directories 
# in a graphical way. The dependency relations are determined by the #include 
# relations between the files in the directories.

DIRECTORY_GRAPH        = YES

# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
# generated by dot. Possible values are png, jpg, or gif 
# If left blank png will be used.

DOT_IMAGE_FORMAT       = png

# The tag DOT_PATH can be used to specify the path where the dot tool can be 
# found. If left blank, it is assumed the dot tool can be found in the path.

DOT_PATH               = 

# The DOTFILE_DIRS tag can be used to specify one or more directories that 
# contain dot files that are included in the documentation (see the 
# \dotfile command).

DOTFILE_DIRS           = 

# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
# nodes that will be shown in the graph. If the number of nodes in a graph 
# becomes larger than this value, doxygen will truncate the graph, which is 
# visualized by representing a node as a red box. Note that doxygen if the 
# number of direct children of the root node in a graph is already larger than 
# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 
# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.

DOT_GRAPH_MAX_NODES    = 50

# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
# graphs generated by dot. A depth value of 3 means that only nodes reachable 
# from the root by following a path via at most 3 edges will be shown. Nodes 
# that lay further from the root node will be omitted. Note that setting this 
# option to 1 or 2 may greatly reduce the computation time needed for large 
# code bases. Also note that the size of a graph can be further restricted by 
# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.

MAX_DOT_GRAPH_DEPTH    = 0

# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
# background. This is disabled by default, because dot on Windows does not 
# seem to support this out of the box. Warning: Depending on the platform used, 
# enabling this option may lead to badly anti-aliased labels on the edges of 
# a graph (i.e. they become hard to read).

DOT_TRANSPARENT        = NO

# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
# files in one run (i.e. multiple -o and -T options on the command line). This 
# makes dot run faster, but since only newer versions of dot (>1.8.10) 
# support this, this feature is disabled by default.

DOT_MULTI_TARGETS      = NO

# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
# generate a legend page explaining the meaning of the various boxes and 
# arrows in the dot generated graphs.

GENERATE_LEGEND        = YES

# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
# remove the intermediate dot files that are used to generate 
# the various graphs.

DOT_CLEANUP            = YES

#---------------------------------------------------------------------------
# Options related to the search engine
#---------------------------------------------------------------------------

# The SEARCHENGINE tag specifies whether or not a search engine should be 
# used. If set to NO the values of all tags below this one will be ignored.

SEARCHENGINE           = YES


================================================
FILE: Doxyfile_internal
================================================
# Doxyfile 1.5.8

# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project
#
# All text after a hash (#) is considered a comment and will be ignored
# The format is:
#       TAG = value [value, ...]
# For lists items can also be appended using:
#       TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (" ")

#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------

# This tag specifies the encoding used for all characters in the config file 
# that follow. The default is UTF-8 which is also the encoding used for all 
# text before the first occurrence of this tag. Doxygen uses libiconv (or the 
# iconv built into libc) for the transcoding. See 
# http://www.gnu.org/software/libiconv for the list of possible encodings.

DOXYFILE_ENCODING      = UTF-8

# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
# by quotes) that should identify the project.

PROJECT_NAME           = "GraphLab: Distributed Graph-Parallel API"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
# This could be handy for archiving the generated documentation or 
# if some version control system is used.

PROJECT_NUMBER         = 2.2

# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
# base path where the generated documentation will be put. 
# If a relative path is entered, it will be relative to the location 
# where doxygen was started. If left blank the current directory will be used.

OUTPUT_DIRECTORY       = doc/doxygen_internal

# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
# 4096 sub-directories (in 2 levels) under the output directory of each output 
# format and will distribute the generated files over these directories. 
# Enabling this option can be useful when feeding doxygen a huge amount of 
# source files, where putting all generated files in the same directory would 
# otherwise cause performance problems for the file system.

CREATE_SUBDIRS         = NO

# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
# documentation generated by doxygen is written. Doxygen will use this 
# information to generate all constant output in the proper language. 
# The default language is English, other supported languages are: 
# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, 
# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), 
# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, 
# Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, 
# Spanish, Swedish, and Ukrainian.

OUTPUT_LANGUAGE        = English

# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
# include brief member descriptions after the members that are listed in 
# the file and class documentation (similar to JavaDoc). 
# Set to NO to disable this.

BRIEF_MEMBER_DESC      = YES

# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
# the brief description of a member or function before the detailed description. 
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
# brief descriptions will be completely suppressed.

REPEAT_BRIEF           = YES

# This tag implements a quasi-intelligent brief description abbreviator 
# that is used to form the text in various listings. Each string 
# in this list, if found as the leading text of the brief description, will be 
# stripped from the text and the result after processing the whole list, is 
# used as the annotated text. Otherwise, the brief description is used as-is. 
# If left blank, the following values are used ("$name" is automatically 
# replaced with the name of the entity): "The $name class" "The $name widget" 
# "The $name file" "is" "provides" "specifies" "contains" 
# "represents" "a" "an" "the"

ABBREVIATE_BRIEF       = 

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
# Doxygen will generate a detailed section even if there is only a brief 
# description.

ALWAYS_DETAILED_SEC    = NO

# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
# inherited members of a class in the documentation of that class as if those 
# members were ordinary class members. Constructors, destructors and assignment 
# operators of the base classes will not be shown.

INLINE_INHERITED_MEMB  = YES

# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
# path before files name in the file list and in the header files. If set 
# to NO the shortest path that makes the file name unique will be used.

FULL_PATH_NAMES        = YES

# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
# can be used to strip a user-defined part of the path. Stripping is 
# only done if one of the specified strings matches the left-hand part of 
# the path. The tag can be used to show relative paths in the file list. 
# If left blank the directory from which doxygen is run is used as the 
# path to strip.

STRIP_FROM_PATH        = src/ 

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
# the path mentioned in the documentation of a class, which tells 
# the reader which header file to include in order to use a class. 
# If left blank only the name of the header file containing the class 
# definition is used. Otherwise one should specify the include paths that 
# are normally passed to the compiler using the -I flag.

STRIP_FROM_INC_PATH    = src/ 

# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
# (but less readable) file names. This can be useful is your file systems 
# doesn't support long names like on DOS, Mac, or CD-ROM.

SHORT_NAMES            = NO

# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
# will interpret the first line (until the first dot) of a JavaDoc-style 
# comment as the brief description. If set to NO, the JavaDoc 
# comments will behave just like regular Qt-style comments 
# (thus requiring an explicit @brief command for a brief description.)

JAVADOC_AUTOBRIEF      = NO

# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
# interpret the first line (until the first dot) of a Qt-style 
# comment as the brief description. If set to NO, the comments 
# will behave just like regular Qt-style comments (thus requiring 
# an explicit \brief command for a brief description.)

QT_AUTOBRIEF           = NO

# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
# comments) as a brief description. This used to be the default behaviour. 
# The new default is to treat a multi-line C++ comment block as a detailed 
# description. Set this tag to YES if you prefer the old behaviour instead.

MULTILINE_CPP_IS_BRIEF = NO

# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
# member inherits the documentation from any documented member that it 
# re-implements.

INHERIT_DOCS           = YES

# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
# a new page for each member. If set to NO, the documentation of a member will 
# be part of the file/class/namespace that contains it.

SEPARATE_MEMBER_PAGES  = NO

# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
# Doxygen uses this value to replace tabs by spaces in code fragments.

TAB_SIZE               = 2

# This tag can be used to specify a number of aliases that acts 
# as commands in the documentation. An alias has the form "name=value". 
# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
# put the command \sideeffect (or @sideeffect) in the documentation, which 
# will result in a user-defined paragraph with heading "Side Effects:". 
# You can put \n's in the value part of an alias to insert newlines.

ALIASES                = 

# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
# sources only. Doxygen will then generate output that is more tailored for C. 
# For instance, some of the names that are used will be different. The list 
# of all members will be omitted, etc.

OPTIMIZE_OUTPUT_FOR_C  = NO

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
# sources only. Doxygen will then generate output that is more tailored for 
# Java. For instance, namespaces will be presented as packages, qualified 
# scopes will look different, etc.

OPTIMIZE_OUTPUT_JAVA   = NO

# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 
# sources only. Doxygen will then generate output that is more tailored for 
# Fortran.

OPTIMIZE_FOR_FORTRAN   = NO

# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 
# sources. Doxygen will then generate output that is tailored for 
# VHDL.

OPTIMIZE_OUTPUT_VHDL   = NO

# Doxygen selects the parser to use depending on the extension of the files it parses. 
# With this tag you can assign which parser to use for a given extension. 
# Doxygen has a built-in mapping, but you can override or extend it using this tag. 
# The format is ext=language, where ext is a file extension, and language is one of 
# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, 
# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat 
# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), 
# use: inc=Fortran f=C

EXTENSION_MAPPING      = 

# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 
# to include (a tag file for) the STL sources as input, then you should 
# set this tag to YES in order to let doxygen match functions declarations and 
# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
# func(std::string) {}). This also make the inheritance and collaboration 
# diagrams that involve STL classes more complete and accurate.

BUILTIN_STL_SUPPORT    = YES

# If you use Microsoft's C++/CLI language, you should set this option to YES to 
# enable parsing support.

CPP_CLI_SUPPORT        = NO

# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 
# Doxygen will parse them like normal C++ but will assume all classes use public 
# instead of private inheritance when no explicit protection keyword is present.

SIP_SUPPORT            = NO

# For Microsoft's IDL there are propget and propput attributes to indicate getter 
# and setter methods for a property. Setting this option to YES (the default) 
# will make doxygen to replace the get and set methods by a property in the 
# documentation. This will only work if the methods are indeed getting or 
# setting a simple type. If this is not the case, or you want to show the 
# methods anyway, you should set this option to NO.

IDL_PROPERTY_SUPPORT   = NO

# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
# tag is set to YES, then doxygen will reuse the documentation of the first 
# member in the group (if any) for the other members of the group. By default 
# all members of a group must be documented explicitly.

DISTRIBUTE_GROUP_DOC   = NO

# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
# the same type (for instance a group of public functions) to be put as a 
# subgroup of that type (e.g. under the Public Functions section). Set it to 
# NO to prevent subgrouping. Alternatively, this can be done per class using 
# the \nosubgrouping command.

SUBGROUPING            = YES

# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 
# is documented as struct, union, or enum with the name of the typedef. So 
# typedef struct TypeS {} TypeT, will appear in the documentation as a struct 
# with name TypeT. When disabled the typedef will appear as a member of a file, 
# namespace, or class. And the struct will be named TypeS. This can typically 
# be useful for C code in case the coding convention dictates that all compound 
# types are typedef'ed and only the typedef is referenced, never the tag name.

TYPEDEF_HIDES_STRUCT   = NO

# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to 
# determine which symbols to keep in memory and which to flush to disk. 
# When the cache is full, less often used symbols will be written to disk. 
# For small to medium size projects (<1000 input files) the default value is 
# probably good enough. For larger projects a too small cache size can cause 
# doxygen to be busy swapping symbols to and from disk most of the time 
# causing a significant performance penality. 
# If the system has enough physical memory increasing the cache will improve the 
# performance by keeping more symbols in memory. Note that the value works on 
# a logarithmic scale so increasing the size by one will rougly double the 
# memory usage. The cache size is given by this formula: 
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, 
# corresponding to a cache size of 2^16 = 65536 symbols

SYMBOL_CACHE_SIZE      = 0

#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------

# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
# documentation are documented, even if no documentation was available. 
# Private class members and static file members will be hidden unless 
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL            = NO

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
# will be included in the documentation.

EXTRACT_PRIVATE        = NO

# If the EXTRACT_STATIC tag is set to YES all static members of a file 
# will be included in the documentation.

EXTRACT_STATIC         = NO

# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
# defined locally in source files will be included in the documentation. 
# If set to NO only classes defined in header files are included.

EXTRACT_LOCAL_CLASSES  = NO

# This flag is only useful for Objective-C code. When set to YES local 
# methods, which are defined in the implementation section but not in 
# the interface are included in the documentation. 
# If set to NO (the default) only methods in the interface are included.

EXTRACT_LOCAL_METHODS  = NO

# If this flag is set to YES, the members of anonymous namespaces will be 
# extracted and appear in the documentation as a namespace called 
# 'anonymous_namespace{file}', where file will be replaced with the base 
# name of the file that contains the anonymous namespace. By default 
# anonymous namespace are hidden.

EXTRACT_ANON_NSPACES   = NO

# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
# undocumented members of documented classes, files or namespaces. 
# If set to NO (the default) these members will be included in the 
# various overviews, but no documentation section is generated. 
# This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_MEMBERS     = NO

# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
# undocumented classes that are normally visible in the class hierarchy. 
# If set to NO (the default) these classes will be included in the various 
# overviews. This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_CLASSES     = YES

# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
# friend (class|struct|union) declarations. 
# If set to NO (the default) these declarations will be included in the 
# documentation.

HIDE_FRIEND_COMPOUNDS  = YES

# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
# documentation blocks found inside the body of a function. 
# If set to NO (the default) these blocks will be appended to the 
# function's detailed documentation block.

HIDE_IN_BODY_DOCS      = YES

# The INTERNAL_DOCS tag determines if documentation 
# that is typed after a \internal command is included. If the tag is set 
# to NO (the default) then the documentation will be excluded. 
# Set it to YES to include the internal documentation.

INTERNAL_DOCS          = YES 

# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
# file names in lower-case letters. If set to YES upper-case letters are also 
# allowed. This is useful if you have classes or files whose names only differ 
# in case and if your file system supports case sensitive file names. Windows 
# and Mac users are advised to set this option to NO.

CASE_SENSE_NAMES       = YES

# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
# will show members with their full class and namespace scopes in the 
# documentation. If set to YES the scope will be hidden.

HIDE_SCOPE_NAMES       = NO

# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
# will put a list of the files that are included by a file in the documentation 
# of that file.

SHOW_INCLUDE_FILES     = YES

# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
# is inserted in the documentation for inline members.

INLINE_INFO            = YES

# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
# will sort the (detailed) documentation of file and class members 
# alphabetically by member name. If set to NO the members will appear in 
# declaration order.

SORT_MEMBER_DOCS       = YES

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
# brief documentation of file, namespace and class members alphabetically 
# by member name. If set to NO (the default) the members will appear in 
# declaration order.

SORT_BRIEF_DOCS        = NO

# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 
# hierarchy of group names into alphabetical order. If set to NO (the default) 
# the group names will appear in their defined order.

SORT_GROUP_NAMES       = NO

# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
# sorted by fully-qualified names, including namespaces. If set to 
# NO (the default), the class list will be sorted only by class name, 
# not including the namespace part. 
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. 
# Note: This option applies only to the class list, not to the 
# alphabetical list.

SORT_BY_SCOPE_NAME     = NO

# The GENERATE_TODOLIST tag can be used to enable (YES) or 
# disable (NO) the todo list. This list is created by putting \todo 
# commands in the documentation.

GENERATE_TODOLIST      = NO

# The GENERATE_TESTLIST tag can be used to enable (YES) or 
# disable (NO) the test list. This list is created by putting \test 
# commands in the documentation.

GENERATE_TESTLIST      = NO

# The GENERATE_BUGLIST tag can be used to enable (YES) or 
# disable (NO) the bug list. This list is created by putting \bug 
# commands in the documentation.

GENERATE_BUGLIST       = NO

# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
# disable (NO) the deprecated list. This list is created by putting 
# \deprecated commands in the documentation.

GENERATE_DEPRECATEDLIST= NO

# The ENABLED_SECTIONS tag can be used to enable conditional 
# documentation sections, marked by \if sectionname ... \endif.

ENABLED_SECTIONS       = GRAPHLAB_INTERNAL

# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
# the initial value of a variable or define consists of for it to appear in 
# the documentation. If the initializer consists of more lines than specified 
# here it will be hidden. Use a value of 0 to hide initializers completely. 
# The appearance of the initializer of individual variables and defines in the 
# documentation can be controlled using \showinitializer or \hideinitializer 
# command in the documentation regardless of this setting.

MAX_INITIALIZER_LINES  = 30

# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
# at the bottom of the documentation of classes and structs. If set to YES the 
# list will mention the files that were used to generate the documentation.

SHOW_USED_FILES        = YES

# If the sources in your project are distributed over multiple directories 
# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
# in the documentation. The default is NO.

SHOW_DIRECTORIES       = YES

# Set the SHOW_FILES tag to NO to disable the generation of the Files page. 
# This will remove the Files entry from the Quick Index and from the 
# Folder Tree View (if specified). The default is YES.

SHOW_FILES             = YES

# Set the SHOW_NAMESPACES tag to NO to disable the generation of the 
# Namespaces page. 
# This will remove the Namespaces entry from the Quick Index 
# and from the Folder Tree View (if specified). The default is YES.

SHOW_NAMESPACES        = YES

# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
# doxygen should invoke to get the current version for each file (typically from 
# the version control system). Doxygen will invoke the program by executing (via 
# popen()) the command <command> <input-file>, where <command> is the value of 
# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
# provided by doxygen. Whatever the program writes to standard output 
# is used as the file version. See the manual for examples.

FILE_VERSION_FILTER    = 

# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by 
# doxygen. The layout file controls the global structure of the generated output files 
# in an output format independent way. The create the layout file that represents 
# doxygen's defaults, run doxygen with the -l option. You can optionally specify a 
# file name after the option, if omitted DoxygenLayout.xml will be used as the name 
# of the layout file.

LAYOUT_FILE            = 

#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------

# The QUIET tag can be used to turn on/off the messages that are generated 
# by doxygen. Possible values are YES and NO. If left blank NO is used.

QUIET                  = NO

# The WARNINGS tag can be used to turn on/off the warning messages that are 
# generated by doxygen. Possible values are YES and NO. If left blank 
# NO is used.

WARNINGS               = YES

# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
# automatically be disabled.

WARN_IF_UNDOCUMENTED   = YES

# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
# potential errors in the documentation, such as not documenting some 
# parameters in a documented function, or documenting parameters that 
# don't exist or using markup commands wrongly.

WARN_IF_DOC_ERROR      = YES

# This WARN_NO_PARAMDOC option can be abled to get warnings for 
# functions that are documented, but have no documentation for their parameters 
# or return value. If set to NO (the default) doxygen will only warn about 
# wrong or incomplete parameter documentation, but not about the absence of 
# documentation.

WARN_NO_PARAMDOC       = NO

# The WARN_FORMAT tag determines the format of the warning messages that 
# doxygen can produce. The string should contain the $file, $line, and $text 
# tags, which will be replaced by the file and line number from which the 
# warning originated and the warning text. Optionally the format may contain 
# $version, which will be replaced by the version of the file (if it could 
# be obtained via FILE_VERSION_FILTER)

WARN_FORMAT            = "$file:$line: $text"

# The WARN_LOGFILE tag can be used to specify a file to which warning 
# and error messages should be written. If left blank the output is written 
# to stderr.

WARN_LOGFILE           = doxygen.log 

#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------

# The INPUT tag can be used to specify the files and/or directories that contain 
# documented source files. You may enter file names like "myfile.cpp" or 
# directories like "/usr/src/myproject". Separate the files or directories 
# with spaces.

INPUT                  = src \
                         demoapps \
                         toolkits

# This tag can be used to specify the character encoding of the source files 
# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
# also the default input encoding. Doxygen uses libiconv (or the iconv built 
# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
# the list of possible encodings.

INPUT_ENCODING         = UTF-8

# If the value of the INPUT tag contains directories, you can use the 
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank the following patterns are tested: 
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90

FILE_PATTERNS          = *.hpp *.cpp *.dox 

# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
# should be searched for input files as well. Possible values are YES and NO. 
# If left blank NO is used.

RECURSIVE              = YES

# The EXCLUDE tag can be used to specify files and/or directories that should 
# excluded from the INPUT source files. This way you can easily exclude a 
# subdirectory from a directory tree whose root is specified with the INPUT tag.

EXCLUDE                = src/graphlab/matlab src/graphlab/gpu 

# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
# directories that are symbolic links (a Unix filesystem feature) are excluded 
# from the input.

EXCLUDE_SYMLINKS       = NO

# If the value of the INPUT tag contains directories, you can use the 
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
# certain files from those directories. Note that the wildcards are matched 
# against the file with absolute path, so to exclude all test directories 
# for example use the pattern */test/*

EXCLUDE_PATTERNS       =  */src/graphlab/rpc/*issue.hpp */src/graphlab/rpc/*dispatch.hpp */toolkits/*cpp */toolkits/*hpp

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
# (namespaces, classes, functions, etc.) that should be excluded from the 
# output. The symbol name can be a fully qualified name, a word, or if the 
# wildcard * is used, a substring. Examples: ANamespace, AClass, 
# AClass::ANamespace, ANamespace::*Test

EXCLUDE_SYMBOLS        = graphlab::archive_detail::* graphlab::dc_impl::*

# The EXAMPLE_PATH tag can be used to specify one or more files or 
# directories that contain example code fragments that are included (see 
# the \include command).

EXAMPLE_PATH           = 

# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank all files are included.

EXAMPLE_PATTERNS       = 

# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
# searched for input files to be used with the \include or \dontinclude 
# commands irrespective of the value of the RECURSIVE tag. 
# Possible values are YES and NO. If left blank NO is used.

EXAMPLE_RECURSIVE      = NO

# The IMAGE_PATH tag can be used to specify one or more files or 
# directories that contain image that are included in the documentation (see 
# the \image command).

IMAGE_PATH             = doc/images

# The INPUT_FILTER tag can be used to specify a program that doxygen should 
# invoke to filter for each input file. Doxygen will invoke the filter program 
# by executing (via popen()) the command <filter> <input-file>, where <filter> 
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
# input file. Doxygen will then use the output that the filter program writes 
# to standard output. 
# If FILTER_PATTERNS is specified, this tag will be 
# ignored.

INPUT_FILTER           = 

# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
# basis. 
# Doxygen will compare the file name with each pattern and apply the 
# filter if there is a match. 
# The filters are a list of the form: 
# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
# is applied to all files.

FILTER_PATTERNS        = 

# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
# INPUT_FILTER) will be used to filter the input files when producing source 
# files to browse (i.e. when SOURCE_BROWSER is set to YES).

FILTER_SOURCE_FILES    = NO

#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------

# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
# be generated. Documented entities will be cross-referenced with these sources. 
# Note: To get rid of all source code in the generated output, make sure also 
# VERBATIM_HEADERS is set to NO.

SOURCE_BROWSER         = YES

# Setting the INLINE_SOURCES tag to YES will include the body 
# of functions and classes directly in the documentation.

INLINE_SOURCES         = NO

# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
# doxygen to hide any special comment blocks from generated source code 
# fragments. Normal C and C++ comments will always remain visible.

STRIP_CODE_COMMENTS    = NO

# If the REFERENCED_BY_RELATION tag is set to YES 
# then for each documented function all documented 
# functions referencing it will be listed.

REFERENCED_BY_RELATION = NO

# If the REFERENCES_RELATION tag is set to YES 
# then for each documented function all documented entities 
# called/used by that function will be listed.

REFERENCES_RELATION    = NO

# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) 
# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from 
# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will 
# link to the source code. 
# Otherwise they will link to the documentation.

REFERENCES_LINK_SOURCE = YES

# If the USE_HTAGS tag is set to YES then the references to source code 
# will point to the HTML generated by the htags(1) tool instead of doxygen 
# built-in source browser. The htags tool is part of GNU's global source 
# tagging system (see http://www.gnu.org/software/global/global.html). You 
# will need version 4.8.6 or higher.

USE_HTAGS              = NO

# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
# will generate a verbatim copy of the header file for each class for 
# which an include is specified. Set to NO to disable this.

VERBATIM_HEADERS       = NO

#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------

# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
# of all compounds will be generated. Enable this if the project 
# contains a lot of classes, structs, unions or interfaces.

ALPHABETICAL_INDEX     = YES

# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
# in which this list will be split (can be a number in the range [1..20])

COLS_IN_ALPHA_INDEX    = 5

# In case all classes in a project start with a common prefix, all 
# classes will be put under the same header in the alphabetical index. 
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
# should be ignored while generating the index headers.

IGNORE_PREFIX          = 

#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------

# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
# generate HTML output.

GENERATE_HTML          = YES

# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `html' will be used as the default path.

HTML_OUTPUT            = html

# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
# doxygen will generate files with .html extension.

HTML_FILE_EXTENSION    = .html

# The HTML_HEADER tag can be used to specify a personal HTML header for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard header.

HTML_HEADER            = 

# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard footer.

HTML_FOOTER            = 

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
# style sheet that is used by each HTML page. It can be used to 
# fine-tune the look of the HTML output. If the tag is left blank doxygen 
# will generate a default style sheet. Note that doxygen will try to copy 
# the style sheet file to the HTML output directory, so don't put your own 
# stylesheet in the HTML output directory as well, or it will be erased!

HTML_STYLESHEET        = 

# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
# files or namespaces will be aligned in HTML using tables. If set to 
# NO a bullet list will be used.

HTML_ALIGN_MEMBERS     = YES

# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
# documentation will contain sections that can be hidden and shown after the 
# page has loaded. For this to work a browser that supports 
# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 
# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).

HTML_DYNAMIC_SECTIONS  = YES

# If the GENERATE_DOCSET tag is set to YES, additional index files 
# will be generated that can be used as input for Apple's Xcode 3 
# integrated development environment, introduced with OSX 10.5 (Leopard). 
# To create a documentation set, doxygen will generate a Makefile in the 
# HTML output directory. Running make will produce the docset in that 
# directory and running "make install" will install the docset in 
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
# it at startup. 
# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.

GENERATE_DOCSET        = NO

# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 
# feed. A documentation feed provides an umbrella under which multiple 
# documentation sets from a single provider (such as a company or product suite) 
# can be grouped.

DOCSET_FEEDNAME        = "Doxygen generated docs"

# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 
# should uniquely identify the documentation set bundle. This should be a 
# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 
# will append .docset to the name.

DOCSET_BUNDLE_ID       = org.doxygen.Project

# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
# will be generated that can be used as input for tools like the 
# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 
# of the generated HTML documentation.

GENERATE_HTMLHELP      = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
# be used to specify the file name of the resulting .chm file. You 
# can add a path in front of the file if the result should not be 
# written to the html output directory.

CHM_FILE               = 

# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
# be used to specify the location (absolute path including file name) of 
# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
# the HTML help compiler on the generated index.hhp.

HHC_LOCATION           = 

# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
# controls if a separate .chi index file is generated (YES) or that 
# it should be included in the master .chm file (NO).

GENERATE_CHI           = NO

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING 
# is used to encode HtmlHelp index (hhk), content (hhc) and project file 
# content.

CHM_INDEX_ENCODING     = 

# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
# controls whether a binary table of contents is generated (YES) or a 
# normal table of contents (NO) in the .chm file.

BINARY_TOC             = NO

# The TOC_EXPAND flag can be set to YES to add extra items for group members 
# to the contents of the HTML help documentation and to the tree view.

TOC_EXPAND             = NO

# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER 
# are set, an additional index file will be generated that can be used as input for 
# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated 
# HTML documentation.

GENERATE_QHP           = NO

# If the QHG_LOCATION tag is specified, the QCH_FILE tag can 
# be used to specify the file name of the resulting .qch file. 
# The path specified is relative to the HTML output folder.

QCH_FILE               = 

# The QHP_NAMESPACE tag specifies the namespace to use when generating 
# Qt Help Project output. For more information please see 
# http://doc.trolltech.com/qthelpproject.html#namespace

QHP_NAMESPACE          = 

# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating 
# Qt Help Project output. For more information please see 
# http://doc.trolltech.com/qthelpproject.html#virtual-folders

QHP_VIRTUAL_FOLDER     = doc

# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. 
# For more information please see 
# http://doc.trolltech.com/qthelpproject.html#custom-filters

QHP_CUST_FILTER_NAME   = 

# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see 
# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.

QHP_CUST_FILTER_ATTRS  = 

# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's 
# filter section matches. 
# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.

QHP_SECT_FILTER_ATTRS  = 

# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can 
# be used to specify the location of Qt's qhelpgenerator. 
# If non-empty doxygen will try to run qhelpgenerator on the generated 
# .qhp file.

QHG_LOCATION           = 

# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
# top of each HTML page. The value NO (the default) enables the index and 
# the value YES disables it.

DISABLE_INDEX          = NO

# This tag can be used to set the number of enum values (range [1..20]) 
# that doxygen will group on one line in the generated HTML documentation.

ENUM_VALUES_PER_LINE   = 4

# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index 
# structure should be generated to display hierarchical information. 
# If the tag value is set to FRAME, a side panel will be generated 
# containing a tree-like index structure (just like the one that 
# is generated for HTML Help). For this to work a browser that supports 
# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
# probably better off using the HTML help feature. Other possible values 
# for this tag are: HIERARCHIES, which will generate the Groups, Directories, 
# and Class Hierarchy pages using a tree view instead of an ordered list; 
# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which 
# disables this behavior completely. For backwards compatibility with previous 
# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE 
# respectively.

GENERATE_TREEVIEW      = YES

# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
# used to set the initial width (in pixels) of the frame in which the tree 
# is shown.

TREEVIEW_WIDTH         = 250

# Use this tag to change the font size of Latex formulas included 
# as images in the HTML documentation. The default is 10. Note that 
# when you change the font size after a successful doxygen run you need 
# to manually remove any form_*.png images from the HTML output directory 
# to force them to be regenerated.

FORMULA_FONTSIZE       = 10

#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------

# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
# generate Latex output.

GENERATE_LATEX         = NO

# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `latex' will be used as the default path.

LATEX_OUTPUT           = latex

# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
# invoked. If left blank `latex' will be used as the default command name.

LATEX_CMD_NAME         = latex

# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
# generate index for LaTeX. If left blank `makeindex' will be used as the 
# default command name.

MAKEINDEX_CMD_NAME     = makeindex

# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
# LaTeX documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_LATEX          = NO

# The PAPER_TYPE tag can be used to set the paper type that is used 
# by the printer. Possible values are: a4, a4wide, letter, legal and 
# executive. If left blank a4wide will be used.

PAPER_TYPE             = a4wide

# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
# packages that should be included in the LaTeX output.

EXTRA_PACKAGES         = 

# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
# the generated latex document. The header should contain everything until 
# the first chapter. If it is left blank doxygen will generate a 
# standard header. Notice: only use this tag if you know what you are doing!

LATEX_HEADER           = 

# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
# contain links (just like the HTML output) instead of page references 
# This makes the output suitable for online browsing using a pdf viewer.

PDF_HYPERLINKS         = YES

# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
# plain latex in the generated Makefile. Set this option to YES to get a 
# higher quality PDF documentation.

USE_PDFLATEX           = YES

# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
# command to the generated LaTeX files. This will instruct LaTeX to keep 
# running if errors occur, instead of asking the user for help. 
# This option is also used when generating formulas in HTML.

LATEX_BATCHMODE        = NO

# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
# include the index chapters (such as File Index, Compound Index, etc.) 
# in the output.

LATEX_HIDE_INDICES     = NO

#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------

# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
# The RTF output is optimized for Word 97 and may not look very pretty with 
# other RTF readers or editors.

GENERATE_RTF           = NO

# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `rtf' will be used as the default path.

RTF_OUTPUT             = rtf

# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
# RTF documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_RTF            = NO

# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
# will contain hyperlink fields. The RTF file will 
# contain links (just like the HTML output) instead of page references. 
# This makes the output suitable for online browsing using WORD or other 
# programs which support those fields. 
# Note: wordpad (write) and others do not support links.

RTF_HYPERLINKS         = NO

# Load stylesheet definitions from file. Syntax is similar to doxygen's 
# config file, i.e. a series of assignments. You only have to provide 
# replacements, missing definitions are set to their default value.

RTF_STYLESHEET_FILE    = 

# Set optional variables used in the generation of an rtf document. 
# Syntax is similar to doxygen's config file.

RTF_EXTENSIONS_FILE    = 

#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------

# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
# generate man pages

GENERATE_MAN           = NO

# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `man' will be used as the default path.

MAN_OUTPUT             = man

# The MAN_EXTENSION tag determines the extension that is added to 
# the generated man pages (default is the subroutine's section .3)

MAN_EXTENSION          = .3

# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
# then it will generate one additional man file for each entity 
# documented in the real man page(s). These additional files 
# only source the real man page, but without them the man command 
# would be unable to find the correct page. The default is NO.

MAN_LINKS              = NO

#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------

# If the GENERATE_XML tag is set to YES Doxygen will 
# generate an XML file that captures the structure of 
# the code including all documentation.

GENERATE_XML           = NO

# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `xml' will be used as the default path.

XML_OUTPUT             = xml

# The XML_SCHEMA tag can be used to specify an XML schema, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_SCHEMA             = 

# The XML_DTD tag can be used to specify an XML DTD, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_DTD                = 

# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
# dump the program listings (including syntax highlighting 
# and cross-referencing information) to the XML output. Note that 
# enabling this will significantly increase the size of the XML output.

XML_PROGRAMLISTING     = YES

#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------

# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
# generate an AutoGen Definitions (see autogen.sf.net) file 
# that captures the structure of the code including all 
# documentation. Note that this feature is still experimental 
# and incomplete at the moment.

GENERATE_AUTOGEN_DEF   = NO

#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------

# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
# generate a Perl module file that captures the structure of 
# the code including all documentation. Note that this 
# feature is still experimental and incomplete at the 
# moment.

GENERATE_PERLMOD       = NO

# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
# to generate PDF and DVI output from the Perl module output.

PERLMOD_LATEX          = NO

# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
# nicely formatted so it can be parsed by a human reader. 
# This is useful 
# if you want to understand what is going on. 
# On the other hand, if this 
# tag is set to NO the size of the Perl module output will be much smaller 
# and Perl will parse it just the same.

PERLMOD_PRETTY         = YES

# The names of the make variables in the generated doxyrules.make file 
# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
# This is useful so different doxyrules.make files included by the same 
# Makefile don't overwrite each other's variables.

PERLMOD_MAKEVAR_PREFIX = 

#---------------------------------------------------------------------------
# Configuration options related to the preprocessor   
#---------------------------------------------------------------------------

# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
# evaluate all C-preprocessor directives found in the sources and include 
# files.

ENABLE_PREPROCESSING   = YES 

# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
# names in the source code. If set to NO (the default) only conditional 
# compilation will be performed. Macro expansion can be done in a controlled 
# way by setting EXPAND_ONLY_PREDEF to YES.

MACRO_EXPANSION        = NO

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
# then the macro expansion is limited to the macros specified with the 
# PREDEFINED and EXPAND_AS_DEFINED tags.

EXPAND_ONLY_PREDEF     = YES

# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
# in the INCLUDE_PATH (see below) will be search if a #include is found.

SEARCH_INCLUDES        = YES

# The INCLUDE_PATH tag can be used to specify one or more directories that 
# contain include files that are not input files but should be processed by 
# the preprocessor.

INCLUDE_PATH           = 

# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
# patterns (like *.h and *.hpp) to filter out the header-files in the 
# directories. If left blank, the patterns specified with FILE_PATTERNS will 
# be used.

INCLUDE_FILE_PATTERNS  = 

# The PREDEFINED tag can be used to specify one or more macro names that 
# are defined before the preprocessor is started (similar to the -D option of 
# gcc). The argument of the tag is a list of macros of the form: name 
# or name=definition (no spaces). If the definition and the = are 
# omitted =1 is assumed. To prevent a macro definition from being 
# undefined via #undef or recursively expanded use the := operator 
# instead of the = operator.

PREDEFINED             = DOXYGEN_DOCUMENTATION GRAPHLAB_SERIALIZE_HPP 

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
# this tag can be used to specify a list of macro names that should be expanded. 
# The macro definition that is found in the sources will be used. 
# Use the PREDEFINED tag if you want to use a different macro definition.

EXPAND_AS_DEFINED      = RPC_DEFAULT_NUMHANDLERTHREADS RPC_DEFAULT_COMMTYPE 

# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
# doxygen's preprocessor will remove all function-like macros that are alone 
# on a line, have an all uppercase name, and do not end with a semicolon. Such 
# function macros are typically used for boiler-plate code, and will confuse 
# the parser if not removed.

SKIP_FUNCTION_MACROS   = YES

#---------------------------------------------------------------------------
# Configuration::additions related to external references   
#---------------------------------------------------------------------------

# The TAGFILES option can be used to specify one or more tagfiles. 
# Optionally an initial location of the external documentation 
# can be added for each tagfile. The format of a tag file without 
# this location is as follows: 
#  
# TAGFILES = file1 file2 ... 
# Adding location for the tag files is done as follows: 
#  
# TAGFILES = file1=loc1 "file2 = loc2" ... 
# where "loc1" and "loc2" can be relative or absolute paths or 
# URLs. If a location is present for each tag, the installdox tool 
# does not have to be run to correct the links. 
# Note that each tag file must have a unique name 
# (where the name does NOT include the path) 
# If a tag file is not located in the directory in which doxygen 
# is run, you must also specify the path to the tagfile here.

TAGFILES               = 

# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
# a tag file that is based on the input files it reads.

GENERATE_TAGFILE       = 

# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
# in the class index. If set to NO only the inherited external classes 
# will be listed.

ALLEXTERNALS           = NO

# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
# in the modules index. If set to NO, only the current project's groups will 
# be listed.

EXTERNAL_GROUPS        = YES

# The PERL_PATH should be the absolute path and name of the perl script 
# interpreter (i.e. the result of `which perl').

PERL_PATH              = /usr/bin/perl

#---------------------------------------------------------------------------
# Configuration options related to the dot tool   
#---------------------------------------------------------------------------

# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
# or super classes. Setting the tag to NO turns the diagrams off. Note that 
# this option is superseded by the HAVE_DOT option below. This is only a 
# fallback. It is recommended to install and use dot, since it yields more 
# powerful graphs.

CLASS_DIAGRAMS         = NO

# You can define message sequence charts within doxygen comments using the \msc 
# command. Doxygen will then run the mscgen tool (see 
# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 
# documentation. The MSCGEN_PATH tag allows you to specify the directory where 
# the mscgen tool resides. If left empty the tool is assumed to be found in the 
# default search path.

MSCGEN_PATH            = 

# If set to YES, the inheritance and collaboration graphs will hide 
# inheritance and usage relations if the target is undocumented 
# or is not a class.

HIDE_UNDOC_RELATIONS   = YES

# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
# available from the path. This tool is part of Graphviz, a graph visualization 
# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
# have no effect if this option is set to NO (the default)

HAVE_DOT               = NO

# By default doxygen will write a font called FreeSans.ttf to the output 
# directory and reference it in all dot files that doxygen generates. This 
# font does not include all possible unicode characters however, so when you need 
# these (or just want a differently looking font) you can specify the font name 
# using DOT_FONTNAME. You need need to make sure dot is able to find the font, 
# which can be done by putting it in a standard location or by setting the 
# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory 
# containing the font.

DOT_FONTNAME           = FreeSans

# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. 
# The default size is 10pt.

DOT_FONTSIZE           = 10

# By default doxygen will tell dot to use the output directory to look for the 
# FreeSans.ttf font (which doxygen will put there itself). If you specify a 
# different font using DOT_FONTNAME you can set the path where dot 
# can find it using this tag.

DOT_FONTPATH           = 

# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect inheritance relations. Setting this tag to YES will force the 
# the CLASS_DIAGRAMS tag to NO.

CLASS_GRAPH            = NO

# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect implementation dependencies (inheritance, containment, and 
# class references variables) of the class with other documented classes.

COLLABORATION_GRAPH    = NO

# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for groups, showing the direct groups dependencies

GROUP_GRAPHS           = NO

# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
# collaboration diagrams in a style similar to the OMG's Unified Modeling 
# Language.

UML_LOOK               = NO

# If set to YES, the inheritance and collaboration graphs will show the 
# relations between templates and their instances.

TEMPLATE_RELATIONS     = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
# tags are set to YES then doxygen will generate a graph for each documented 
# file showing the direct and indirect include dependencies of the file with 
# other documented files.

INCLUDE_GRAPH          = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
# documented header file showing the documented files that directly or 
# indirectly include this file.

INCLUDED_BY_GRAPH      = NO

# If the CALL_GRAPH and HAVE_DOT options are set to YES then 
# doxygen will generate a call dependency graph for every global function 
# or class method. Note that enabling this option will significantly increase 
# the time of a run. So in most cases it will be better to enable call graphs 
# for selected functions only using the \callgraph command.

CALL_GRAPH             = NO

# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 
# doxygen will generate a caller dependency graph for every global function 
# or class method. Note that enabling this option will significantly increase 
# the time of a run. So in most cases it will be better to enable caller 
# graphs for selected functions only using the \callergraph command.

CALLER_GRAPH           = NO

# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
# will graphical hierarchy of all classes instead of a textual one.

GRAPHICAL_HIERARCHY    = YES

# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
# then doxygen will show the dependencies a directory has on other directories 
# in a graphical way. The dependency relations are determined by the #include 
# relations between the files in the directories.

DIRECTORY_GRAPH        = YES

# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
# generated by dot. Possible values are png, jpg, or gif 
# If left blank png will be used.

DOT_IMAGE_FORMAT       = png

# The tag DOT_PATH can be used to specify the path where the dot tool can be 
# found. If left blank, it is assumed the dot tool can be found in the path.

DOT_PATH               = 

# The DOTFILE_DIRS tag can be used to specify one or more directories that 
# contain dot files that are included in the documentation (see the 
# \dotfile command).

DOTFILE_DIRS           = 

# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
# nodes that will be shown in the graph. If the number of nodes in a graph 
# becomes larger than this value, doxygen will truncate the graph, which is 
# visualized by representing a node as a red box. Note that doxygen if the 
# number of direct children of the root node in a graph is already larger than 
# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 
# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.

DOT_GRAPH_MAX_NODES    = 50

# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
# graphs generated by dot. A depth value of 3 means that only nodes reachable 
# from the root by following a path via at most 3 edges will be shown. Nodes 
# that lay further from the root node will be omitted. Note that setting this 
# option to 1 or 2 may greatly reduce the computation time needed for large 
# code bases. Also note that the size of a graph can be further restricted by 
# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.

MAX_DOT_GRAPH_DEPTH    = 0

# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
# background. This is disabled by default, because dot on Windows does not 
# seem to support this out of the box. Warning: Depending on the platform used, 
# enabling this option may lead to badly anti-aliased labels on the edges of 
# a graph (i.e. they become hard to read).

DOT_TRANSPARENT        = NO

# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
# files in one run (i.e. multiple -o and -T options on the command line). This 
# makes dot run faster, but since only newer versions of dot (>1.8.10) 
# support this, this feature is disabled by default.

DOT_MULTI_TARGETS      = NO

# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
# generate a legend page explaining the meaning of the various boxes and 
# arrows in the dot generated graphs.

GENERATE_LEGEND        = YES

# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
# remove the intermediate dot files that are used to generate 
# the various graphs.

DOT_CLEANUP            = YES

#---------------------------------------------------------------------------
# Options related to the search engine
#---------------------------------------------------------------------------

# The SEARCHENGINE tag specifies whether or not a search engine should be 
# used. If set to NO the values of all tags below this one will be ignored.

SEARCHENGINE           = YES


================================================
FILE: README.md
================================================
# GraphLab PowerGraph v2.2

## UPDATE: For a signficant evolution of this codebase, see GraphLab Create which is available for download at [turi.com](https://turi.com)

## History
In 2013, the team that created GraphLab PowerGraph started the Seattle-based company, GraphLab, Inc. The learnings from GraphLab PowerGraph and GraphChi projects have culminated into GraphLab Create, a enterprise-class data science platform for data scientists and software engineers that can simplify building and deploying advanced machine learning models as a RESTful predictive service. In January 2015, GraphLab, Inc. was renamed to Turi. See [turi.com](https://turi.com) for more information. 

## Status
GraphLab PowerGraph is no longer in active development by the founding team. GraphLab PowerGraph is now supported by the community at [http://forum.turi.com/](http://forum.turi.com/).  

# Introduction

GraphLab PowerGraph is a graph-based, high performance, distributed computation framework written in C++. 

The GraphLab PowerGraph academic project was started in 2009 at Carnegie Mellon University to develop a new parallel computation abstraction tailored to machine learning. GraphLab PowerGraph 1.0 employed shared-memory design. In GraphLab PowerGraph 2.1, the framework was redesigned to target the distributed environment. It addressed the difficulties with real-world power-law graphs and achieved unparalleled performance at the time. In GraphLab PowerGraph 2.2, the Warp System was introduced and provided a new flexible, distributed architecture around fine-grained user-mode threading (fibers). The Warp System allows one to easily extend the abstraction, to improve optimization for example, while also improving usability.

GraphLab PowerGraph is the culmination of 4-years of research and development into graph computation, distributed computing, and machine learning. GraphLab PowerGraph scales to graphs with billions of vertices and edges easily, performing orders of magnitude faster than competing systems. GraphLab PowerGraph combines advances in machine learning algorithms, asynchronous distributed graph computation, prioritized scheduling, and graph placement with optimized low-level system design and efficient data-structures to achieve unmatched performance and scalability in challenging machine learning tasks.

Related is GraphChi, a spin-off project separate from the GraphLab PowerGraph project. GraphChi was designed to run very large graph computations on just a single machine, by using a novel algorithm for processing the graph from disk (SSD or hard drive) enabling a single desktop computer (actually a Mac Mini) to tackle problems that previously demanded an entire cluster. For more information, see [https://github.com/GraphChi](https://github.com/GraphChi).

# License


GraphLab PowerGraph is released under the [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0.html).

If you use GraphLab PowerGraph in your research, please cite our paper:
```
    @inproceedings{Low+al:uai10graphlab,
      title = {GraphLab: A New Parallel Framework for Machine Learning},
      author = {Yucheng Low and
                Joseph Gonzalez and
                Aapo Kyrola and
                Danny Bickson and
                Carlos Guestrin and
                Joseph M. Hellerstein},
      booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)},
      month = {July},
      year = {2010}
    }
```

# Academic and Conference Papers

Joseph E. Gonzalez, Yucheng Low, Haijie Gu, Danny Bickson, and Carlos Guestrin (2012). "[PowerGraph: Distributed Graph-Parallel Computation on Natural Graphs](https://www.usenix.org/conference/osdi12/technical-sessions/presentation/gonzalez)." Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI '12).

Yucheng Low, Joseph Gonzalez, Aapo Kyrola, Danny Bickson, Carlos Guestrin and Joseph M. Hellerstein (2012). "[Distributed GraphLab: A Framework for Machine Learning and Data Mining in the Cloud](http://vldb.org/pvldb/vol5/p716_yuchenglow_vldb2012.pdf)." Proceedings of the VLDB Endowment (PVLDB).

Yucheng Low, Joseph Gonzalez, Aapo Kyrola, Danny Bickson, Carlos Guestrin, and Joseph M. Hellerstein (2010). "[GraphLab: A New Parallel Framework for Machine Learning](http://arxiv.org/pdf/1006.4990v1.pdf)." Conference on Uncertainty in Artificial Intelligence (UAI).

Li, Kevin; Gibson, Charles; Ho, David; Zhou, Qi; Kim, Jason; Buhisi, Omar; Brown, Donald E.; Gerber, Matthew, "[Assessment of machine learning algorithms in cloud computing frameworks](http://ieeexplore.ieee.org/xpl/articleDetails.jsp?reload=true&arnumber=6549501)", Systems and Information Engineering Design Symposium (SIEDS), 2013 IEEE, pp.98,103, 26-26 April 2013

[Towards Benchmarking Graph-Processing Platforms](http://sc13.supercomputing.org/sites/default/files/PostersArchive/post152.html). by Yong Guo (Delft University of Technology), Marcin Biczak (Delft University of Technology), Ana Lucia Varbanescu (University of Amsterdam), Alexandru Iosup (Delft University of Technology), Claudio Martella (VU University Amsterdam), Theodore L. Willke (Intel Corporation), in Super Computing 13

Aapo Kyrola, Guy Blelloch, and Carlos Guestrin (2012). "[GraphChi: Large-Scale Graph computation on Just a PC](https://www.usenix.org/conference/osdi12/technical-sessions/presentation/kyrola)." Proceedings of the 10th USENIX Symposium on Operating Systems Design and Implementation (OSDI '12).


# The Software Stack

The GraphLab PowerGraph project consists of a core API and a collection of high-performance machine learning and data mining toolkits built on top. The API is written in C++ and built on top of standard cluster and cloud technologies. Inter-process communication is accomplished over TCP-IP and MPI is used to launch and manage GraphLab PowerGraph programs. Each process is multithreaded to fully utilize the multicore resources available on modern cluster nodes. It supports reading and writing to both Posix and HDFS filesystems.

![GraphLab PowerGraph Software Stack](images/gl_os_software_stack.png "GraphLab Software Stack")

GraphLab PowerGraph has a large selection of machine learning methods already implemented (see /toolkits directory in this repo). You can also implement your own algorithms on top of the graph programming API (a certain degree of C++ knowledge is required).

GraphLab PowerGraph Feature Highlights
--------------------------------------

* **Unified multicore/distributed API:** write once run anywhere 

* **Tuned for performance:** optimized C++ execution engine leverages extensive multi-threading and asynchronous IO 

* **Scalable:** Run on large cluster deployments by intelligently placing data and computation 

* **HDFS Integration:** Access your data directly from HDFS 

* **Powerful Machine Learning Toolkits:** Tackle challenging machine learning problems with ease

## Building

The current version of GraphLab PowerGraph was tested on Ubuntu Linux 64-bit 10.04,  11.04 (Natty), 12.04 (Pangolin) as well as Mac OS X 10.7 (Lion) and Mac OS X 10.8 (Mountain Lion). It requires a 64-bit operating system.

# Dependencies

To simplify installation, GraphLab PowerGraph currently downloads and builds most of its required dependencies using CMake’s External Project feature. This also means the first build could take a long time.

There are however, a few dependencies which must be manually satisfied.

* On OS X: g++ (>= 4.2) or clang (>= 3.0) [Required]
  +  Required for compiling GraphLab.

* On Linux: g++ (>= 4.3) or clang (>= 3.0) [Required]
  +  Required for compiling GraphLab.

* *nix build tools: patch, make [Required]
   +  Should come with most Mac/Linux systems by default. Recent Ubuntu version will require to install the build-essential package.

* zlib [Required]
   +   Comes with most Mac/Linux systems by default. Recent Ubuntu version will require the zlib1g-dev package.

* Open MPI or MPICH2 [Strongly Recommended]
   + Required for running GraphLab distributed. 

* JDK 6 or greater [Optional]
   + Required for HDFS support 

## Satisfying Dependencies on Mac OS X

Installing XCode with the command line tools (in XCode 4.3 you have to do this manually in the XCode Preferences -&gt; Download pane), satisfies all of these dependencies.

## Satisfying Dependencies on Ubuntu

All the dependencies can be satisfied from the repository:

    sudo apt-get update
    sudo apt-get install gcc g++ build-essential libopenmpi-dev openmpi-bin default-jdk cmake zlib1g-dev git

# Downloading GraphLab PowerGraph

You can download GraphLab PowerGraph directly from the Github Repository. Github also offers a zip download of the repository if you do not have git.

The git command line for cloning the repository is:

    git clone https://github.com/graphlab-code/graphlab.git
    cd graphlab


# Compiling and Running

```
./configure
```

In the graphlabapi directory, will create two sub-directories, release/ and debug/ . cd into either of these directories and running make will build the release or the debug versions respectively. Note that this will compile all of GraphLab, including all toolkits. Since some toolkits require additional dependencies (for instance, the Computer Vision toolkit needs OpenCV), this will also download and build all optional dependencies.

We recommend using make’s parallel build feature to accelerate the compilation process. For instance:

```
make -j4
```

will perform up to 4 build tasks in parallel. When building in release/ mode, GraphLab does require a large amount of memory to compile with the heaviest toolkit requiring 1GB of RAM.

Alternatively, if you know exactly which toolkit you want to build, cd into the toolkit’s sub-directory and running make, will be significantly faster as it will only download the minimal set of dependencies for that toolkit. For instance:

```
cd release/toolkits/graph_analytics
make -j4
```

will build only the Graph Analytics toolkit and will not need to obtain OpenCV, Eigen, etc used by the other toolkits.

## Compilation Issues
If you encounter issues please post the following on the [GraphLab forum](http://forum.graphlab.com).

* detailed description of the problem you are facing
* OS and OS version
* output of uname -a
* hardware of the machine
* utput of g++ -v and clang++ -v
* contents of graphlab/config.log and graphlab/configure.deps

# Writing Your Own Apps

There are two ways to write your own apps.

* To work in the GraphLab PowerGraph source tree, (recommended)
* Install and link against Graphlab PowerGraph (not recommended)


## 1:  Working in the GraphLab PowerGraph Source Tree

This is the best option if you just want to try using GraphLab PowerGraph quickly. GraphLab PowerGraph
uses the CMake build system which enables you to quickly create
a C++ project without having to write complicated Makefiles. 

1. Create your own sub-directory in the apps/ directory. for example apps/my_app
   
2. Create a CMakeLists.txt in apps/my_app containing the following lines:

    project(GraphLab) 
    add_graphlab_executable(my_app [List of cpp files space separated]) 

3. Substituting the right values into the square brackets. For instance:

    project(GraphLab) 
    add_graphlab_executable(my_app my_app.cpp) 

4. Running "make" in the apps/ directory of any of the build directories 
should compile your app. If your app does not show up, try running

    cd [the GraphLab API directory]
    touch apps/CMakeLists.txt


## 2: Installing and Linking Against GraphLab PowerGraph

To install and use GraphLab PowerGraph this way will require your system
to completely satisfy all remaining dependencies, which GraphLab PowerGraph normally 
builds automatically. This path is not extensively tested and is 
**not recommended**

You will require the following additional dependencies
 - libevent (>=2.0.18)
 - libjson (>=7.6.0)
 - libboost (>=1.53)
 - libhdfs (required for HDFS support)
 - tcmalloc (optional)

Follow the instructions in the [Compiling] section to build the release/ 
version of the library. Then cd into the release/ build directory and 
run make install . This will install the following:

* include/graphlab.hpp
 +   The primary GraphLab header 
*  include/graphlab/...
 +   The folder containing the headers for the rest of the GraphLab library 
*  lib/libgraphlab.a
 +   The GraphLab static library.
    
Once you have installed GraphLab PowerGraph you can compile your program by running:

```
g++ -O3 -pthread -lzookeeper_mt -lzookeeper_st -lboost_context -lz -ltcmalloc -levent -levent_pthreads -ljson -lboost_filesystem -lboost_program_options -lboost_system -lboost_iostreams -lboost_date_time -lhdfs -lgraphlab hello_world.cpp
```
    
If you have compiled with MPI support, you will also need

   -lmpi -lmpi++ 
   
# Tutorials
See [tutorials](TUTORIALS.md)

# Datasets
The following are data sets links we found useful when getting started with GraphLab PowerGraph.

##Social Graphs
* [Stanford Large Network Dataset (SNAP)](http://snap.stanford.edu/data/index.html)
* [Laboratory for Web Algorithms](http://law.di.unimi.it/datasets.php)

##Collaborative Filtering
* [Million Song dataset](http://labrosa.ee.columbia.edu/millionsong/)
* [Movielens dataset GroupLens](http://grouplens.org/datasets/movielens/)
* [KDD Cup 2012 by Tencent, Inc.](https://www.kddcup2012.org/)
* [University of Florida sparse matrix collection](http://www.cise.ufl.edu/research/sparse/matrices/)

##Classification
* [Airline on time performance](http://stat-computing.org/dataexpo/2009/)
* [SF restaurants](http://missionlocal.org/san-francisco-restaurant-health-inspections/)

##Misc
* [Amazon Web Services public datasets](http://aws.amazon.com/datasets)
  
# Release Notes
##### **map_reduce_vertices/edges and transform_vertices/edges are not parallelized on Mac OS X**

These operations currently rely on OpenMP for parallelism.

On OS X 10.6 and earlier, gcc 4.2 has several OpenMP bugs and is not stable enough to use reliably.

On OS X 10.7, the clang
++ compiler does not yet support OpenMP.

##### **map_reduce_vertices/edges and transform_vertices/edges use a lot more processors than what was specified in –ncpus**

This is related to the question above. While there is a simple temporary solution (omp_set_num_threads), we intend to properly resolve the issue by not using openMP at all.

##### **Unable to launch distributed GraphLab when each machine has multiple network interfaces**

The communication initialization currently takes the first non-localhost IP address as the machine’s IP. A more reliable solution will be to use the hostname used by MPI.


================================================
FILE: TUTORIALS.md
================================================
# GraphLab PowerGraph Tutorials

##Table of Contents
* [Deploying on AWS EC2 Cluster](#ec2)
* [Deploying in a Cluster](#cluster)
* [Deploying on a single multicore machine](#multicore)
* [Benchmarking on AWS EC2](#benchmarking)
* [Fine tuning GraphLab PowerGraph performance](#perf_tuning)

<a name="ec2"></a>
# Deploying in AWS EC2 Cluster

## Step 0: Requirements
* You should have Amazon EC2 account eligible to run on us-east-1a zone.

* Find out using the Amazon AWS console your AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY (under your account name on the top right corner-&gt; security credentials -&gt; access keys)

* You should have a keypair attached to the zone you are running on (in our example us-east-1a) as explained <a {{ trackClick() }} href="http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html">here</a>. You will need to know your keypair name (graphlabkey in our example), and the location of the private key (~/.ssh/graphlabkey.pem in our example).

* Install [boto](https://pypi.python.org/pypi/boto/). This is the AWS Python client. To install, run: 

```
sudo pip install boto
```

* Download and install GraphLab PowerGraph using the instructions in the [README.md](README.md).


## Step 1: Environment Setup

Edit your .bashrc or .bash_profile or .profile files (remember to source it after editing, using the bash command “source &lt;filename&gt;”)

```
export AWS_ACCESS_KEY_ID=[ Your access key ]
export AWS_SECRET_ACCESS_KEY=[ Your access key secret ]
```

## Step 2: Start the cluster

```
cd ~/graphlabapi/scripts/ec2
./gl-ec2 -i ~/.ssh/graphlab.pem -k graphlabkey  -s 1 launch launchtest
```

(In the above command, we created a 2-node cluster in us-east-1a zone. -s is the number of slaves, launch is the action, and launchtest is the name of the cluster)

## Step 3: Update GraphLab PowerGraph

```
./gl-ec2 -i ~/.ssh/graphlab.pem -k graphlabkey update launchtest
```

## Step 4: Run Alternating Least Squares Demo

This step runs ALS (alternating least squares) in a cluster using small netflix subset.
It first downloads the data from the web: [http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train](http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train) and [http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate](http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate), copy it into HDFS, and runs 5 alternating least squares iterations:

```
./gl-ec2 -i ~/.ssh/graphlab.pem -k graphlabkey als_demo launchtest
```

After the run is completed, login to the master node and view the output files in the folder ~/graphlabapi/release/toolkits/collaborative_filtering/ The algorithm and exact format is explained in the API docs.

## Step 5: Shutdown the Cluster

```
./gl-ec2 -i ~/.ssh/graphlab.pem -k grpahlabkey destroy launchtest
```

## Other Useful Commands:

Login into the master node using

```
./gl-ec2 -i ~/.ssh/graphlab.pem -s 1 login launchtest
```

<a name="cluster"></a>
# Deploying in a Cluster

## Step 0: Install GraphLab PowerGraph on one of your cluster nodes.

Install GraphLab PowerGraph, using instructions in the [README.md](README.md), on your master node (one of your cluster machines).

## Step 1: Copy GraphLab PowerGraph files to all machines.

1) Create a file called in your home directory called “machines” with the names of all the MPI nodes participate in the computation.

For example:

```
cat ~/machines
mynode1.some.random.domain
mynode2.some.random.domain
...
mynode18.some.random.domain
```
2) Verify you have the machines files from section 1) in your root folder of all of the machines.

3) You will need to setup password-less SSH between the master node and all other machines.

Verify it is possible to ssh without password between any pairs of machines. These [instructions](http://www.linuxproblem.org/art_9.html) explain how to setup ssh without passswords.

Before proceeding, verify that this is setup correctly; check that the following connects to the remote machine without prompting for a password:

```
# from machine mynode1.some.random.domain
ssh mynode2.some.random.domain
```

4) On the node you installed GraphLab on, run the following commands to copy GraphLab files to the rest of the machines:

```
cd ~/graphlab/release/toolkits
~/graphlab/scripts/mpirsync
cd ~/graphlab/deps/local
~/graphlab/scripts/mpirsync
```

This step will only work if the file you created in step 1 was named "machines" and located in your home directory.

In order for mpirsync to run properly all machines must have all network ports open.

## Step 2a: Run PageRank on a synthetic graph

This step runs the [PageRank](http://en.wikipedia.org/wiki/PageRank) algorithm on a synthetic generated graph of 100,000 nodes. It spawns two GraphLab mpi instances (-n 2).
```
mpiexec -n 2 -hostfile ~/machines /path/to/pagerank --powerlaw=100000
```

## Step 2: Run GraphLab PowerGraph ALS using subset of Netflix data

This step runs ALS (alternating least squares) in a cluster using small netflix susbset.
It first downloads an anonymized, synthetic Netflix dataset from the web: [http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train](http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train) and [http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate](http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate), and runs 5 alternating least squares iterations. After the run is completed, you can login into any of the nodes and view the output files in the folder ~/graphlab/release/toolkits/collaborative_filtering/

 ```
 cd /some/ns/folder/
mkdir smallnetflix
cd smallnetflix/
wget http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train
wget http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate
```
Now run GraphLab:

````
mpiexec -n 2 -hostfile ~/machines /path/to/als  --matrix /some/ns/folder/smallnetflix/ --max_iter=3 --ncpus=1 --minval=1 --maxval=5 --predictions=out_file
```
Where -n is the number of MPI nodes, and –ncpus is the number of deployed cores on each MPI node.

machines is a file which includes a list of the machines you like to deploy on (each machine in a new line)

Note: this section assumes you have a network storage (ns) folder where the input can be stored.
Alternatively, you can split the input into several disjoint files, and store the subsets on the cluster machines.

Note: Don’t forget to change /path/to/als and /some/ns/folder to your actual folder path!

Note: For mpich2, use -f instead of -hostfile.

## Step 3:

[Fine tuning graphlab deployment](#perf_tuning).

## Errors and their resolution:

### Error:

```
/mnt/info/home/daroczyb/als: error while loading shared libraries: libevent_pthreads-2.0.so.5: cannot open shared object file: No such file or directory
```

**Solution:**

You should define LD_LIBRARY_PATH to point to the location of libevent_pthreads, this is done with the -x mpi command, for example:

```
mpiexec --hostfile machines -x LD_LIBRARY_PATH=/home/daroczyb/graphlab/deps/local/lib/ /mnt/info/home/daroczyb/als /mnt/info/home/daroczyb/smallnetflix_mm.train
```

### Error:

```
mnt/info/home/daroczyb/als: error while loading shared libraries: libjvm.so: cannot open shared object file: No such file or directory
```

**Solution:**

Point LD_LIBRARY_PATH to the location of libjvm.so using the -x mpi command:

```
mpiexec --hostfile machines -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/daroczyb/graphlab/deps/local/lib/:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server/ /mnt/info/home/daroczyb/als /mnt/info/home/daroczyb/smallnetflix_mm.train
```

### Error:

```
problem with execution of /graphlab/release/toolkits/collaborative_filtering/als  on  debian1:  [Errno 2] No such file or directory
```

**Solution:**

You should verify the executable is found on the same path on all machines.

### Error:

a prompt asking for password when running mpiexec

**Solution:** Use the following [instructions](http://www.linuxproblem.org/art_9.html) to allow connection with a public/private key pair (no password).

### Error:

```
Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://[domain]:9000/user/[user_name]/data.txt, expected: file:///
    at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:381)
    at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:55)
    at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:307)
    at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:842)
    at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:867)
    at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:487)
    Call to org.apache.hadoop.fs.FileSystem::listStatus failed!
    WARNING: distributed_graph.hpp(load_from_hdfs:1889): No files found matching hdfs://[domain]:9000/user/[user_name]/data.txt
```

**Solution:**
Verify you classpath includes all hadoop required folders.

### Error:

Just after TCP Communication layer is constructed: 
```
BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES, EXITCODE: 11, CLEANING UP REMAINING PROCESSES, YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
```
or:

```
[xyzserver:22296] *** Process received signal *** mpiexec noticed that process rank 0 with PID 22296 on node xyzserver exited on signal 11 (Segmentation fault).
```

**Solution:**

Check that all machines have access to, or are using the same binary

<a id="multicore"></a>
#Deployment on a single multicore machine

## Preliminaries:

## Step 0: Install GraphLab on one of your cluster nodes.

Using the instructions [here](/projects/source.html) on your master node (one of your cluster machines), except invoke the  configure script with the ‘–no_mpi’ flag.
Don’t forget to use
```
./configure --no_mpi
```

when configuring GraphLab.

## Step 1: Run GraphLab ALS

This step runs ALS (alternating least squares) in a cluster using small netflix susbset. It first downloads the data from the web, runs 5 alternating least squares iterations. After the run is completed, the output files will be created in the running folder (the folder graphlab/release/toolkits/collaborative_filtering/) 

```
cd graphlab/release/toolkits/collaborative_filtering/
mkdir smallnetflix
cd smallnetflix/
wget http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train
wget http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate
cd ..
```

Now run GraphLab:

```
./als --matrix ./smallnetflix/ --max_iter=5 --ncpus=1 --predictions=out_file
```
    
Where –ncpus is the number of deployed cores.

<a id="benchmarking"></a>
# Benchmarking on AWS EC2

A commonly repeating task is evaluation of GraphLab performance and scaling properties on a cluster. To help jump start benchmarking we have created this tutorial.

## Step 0: Requirements

1. You should have Amazon EC2 account eligible to run on us-west zone.
2. Find out using the Amazon AWS console your AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY (under your account name on the top right corner-> security credentials -> access keys)
3. You should have a keypair attached to the zone you are running on (in our example us-west) as explained [here](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html). You will need to know your keypair name (amazonec2 in our example), and the location of the private key (~/.ssh/amazonec2.pem in our example).
4. Install boto. This is the AWS Python client. To install, run: `sudo pip boto`.
5. Download and install GraphLab  using the instructions [here](/projects/source.html).

## Step 1: Recommended setting

We recommend using high performance computing instances (like cc2.8xlarge) since we observed a significant improved performance especially related to variation in cluster load and network utilization. The scripts also allow using regular instances.

To avoid ec2 unexpected loads, we recommend repeating each experiment a few times and computing the average.

## Step 2: Environment Setup

Edit your .bashrc or .bash_profile or .profile files (remember to source it after editing, using the bash command “source <filename>”)

```
export AWS_ACCESS_KEY_ID=[ Your access key ]
export AWS_SECRET_ACCESS_KEY=[ Your access key secret ]
```

## Step 3: configure benchmarking

Edit the [benchmark_ec2.sh](https://github.com/graphlab-code/graphlab/blob/master/scripts/ec2/benchmark_ec2.sh) script found under graphlab/scripts/ec2
1. Select the requested algorithms of the following options:
 ```
ALS=1 # alternating least squares
SVD=1 # singular value decomposition
PR=1  # pagerank
```
(Setting an algorithm to 0 will disable its run).
2. Select the number of slaves (any number between 0 to n) by setting the MAX_SLAVES variable.
3. Select the number of experiment repeats (any number between 0 to n) by setting the MAX_RETRY variable. The benchmarking script, spawns an ec2 cluster of size n machines, and then tests the requested algorithm using 0, 1, … n-1 slaves. Each experiment is repeated MAX_RETRY times.

### Step 3: Perform benchmarking

```
cd ~/graphlabapi/scripts/ec2
./benchmark_ec2.sh
```
It is advised to redirect the benchmarking output to file, for example on bash:

 ```
 ./benchmark_ec2 > output 2>&1
 ```

### Step 4: Processing the results

For detecting final runtime for ALS/SVD

```
grep "Runtime" output
```
For detecting final runtime for PR:

```
grep "Finished Running" output
```
You will need to manually compute the average runtime for each case. A recommended metric to use is the “speedup” curve, which is the time for executing on a single machine divided by the time executing on k machines. The optimal result is linear speedup, namely running on k machines speeds up the algorithm k times vs. running on a single machine.

### Step 5: behind the scenes

Here is a more detailed explanation of the benchmarking process. The benchmarking is calling gl-ec2 script which calls [gl_ec2.py](https://github.com/graphlab-code/graphlab/blob/master/scripts/ec2/gl_ec2.py) script.
1. The “launch” command to start a graphlab cluster with X machines.
2. The “update” command to get the latest version of graphlab from git, recompile it, and disseminate the binary to the salves
3. The “als_demo”, “svd_demo”, “pagerank_demo” command benchmark ALS/SVD/PR algorithms. It first downloads a dataset from the web and then calls graphlab with the right command lines to issue a run on the downloaded dataset. For PR we use the [LiveJournal](http://snap.stanford.edu/data/soc-LiveJournal1.html) dataset. For ALS/SVD we use a [netflix like synthetic sample](http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train).
4. In case you would like to benchmark a different dataset, you can edit the dataset URL in the gl_ec2.py example.
5. In case you would like to benchmark a different algorithm, you can add an additional youralgo_demo section into the gl_ec2.py script.
6. In case you would like to bechmark a regular instance, simply change the following line in gl_ec2.py from

````
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -a hpc -s $MAX_SLAVES -t cc2.8xlarge launch hpctest
```
to:
```
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2  -s $MAX_SLAVES -t m1.xlarge launch hpctest
```

### Advanced topics.

In case you like to work in a different ec2 region (than the default us-west):

For us-east region, those are the provided AMIs:

Standard: ami-31360458, high performance: ami-39360450.

You should

1. add the following line just before: [gl_ec2.py](https://github.com/graphlab-code/graphlab/blob/master/scripts/ec2/gl_ec2.py#L223)
    
```
opts.ami = "ami-31360458"
```
2. run with the additional command line argument:
```
-r us-east-1
```

### Support

If you encounter any problem when trying to run this benchmarking feel free to post on [forum.graphlab.com](http://forum.graphlab.com)

<a id="perf_tuning"></a>
# Fine tuning GraphLab PowerGraph performance

This section contains tips and examples on how to setup GraphLab properly on your cluster and how to squeeze performance.

## 0: Compile in release

Verify you compiled graphlab in the release subfolder (and not in debug subfolder). Compiling in release may speed execution up to x10 times!

Tip: Always compile in release when testing performance.

## 1: Understanding input graph loading

GraphLab PowerGraph has built in parallel loading of the input graph. However, for efficient parallel loading, the input file should be split into multiple disjoint sub files. When using a single input file, the graph loading becomes serial (which is bad!).

Each MPIinstance has a single loader of the input graph attached to it (does not matter how many cpus are used by that MPI instance).

Tip: Always split your input file into at least as many MPI processes you are using.

## 2: Verify MPI is working correctly

You can test your MPI setup as follows:

1. Compile the release/demoapps/rpc subfolder (using “cd release/demoapps/rpc/; make”). Copy the files generated by the compile to all machines.
2. Run:

```
mpiexec -n 2 --hostfile ~/machines  /home/ubuntu/graphlab/release/demoapps/rpc/rpc_example1
```
As part of the output, you should see something like this:

```
TCP Communication layer constructed.
TCP Communication layer constructed.

10
5 plus 1 is : 6
11 plus 1 is : 12
```

If you get something else, please report an error as explained below

## 3: Fine tuning of the partitioning.

Previous to the program execution, the graph is first loaded into memory and partitioned into the different cluster machines. It is possible to try different partitioning strategies. This is done using the following flags:

```
--graph_opts="ingress=oblivious
```

or

````
--graph_opts="ingress=grid" # works for power of 2 sized cluster i.e. 2,4,8,.. machines
```

For different graphs, different partitioning methods may give different performance gains.

## 4: Setting ncpus

The –ncpus option let you set the number of cores used to perform computation. Prior to 2.1.4644 this defaults to 2. After 2.1.4644, this defaults to #cores – 2. When run in the distributed setting, the maximum number this should be set to is #cores – 2 since 2 cores should be reserved for communication.


================================================
FILE: apps/CMakeLists.txt
================================================
project(GraphLab)

# link_libraries(${Boost_LIBRARIES})
# link_libraries(${GraphLab_LIBRARIES})


macro(add_all_subdirectories retval curdir)
  file(GLOB sub-dir RELATIVE ${curdir} *)
  set(list_of_dirs "")
  foreach(dir ${sub-dir})
    if(IS_DIRECTORY ${curdir}/${dir})
    STRING(SUBSTRING ${dir} 0 1 firstchar)
        if(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_" )
        else(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_")
          set(list_of_dirs ${list_of_dirs} ${dir})
          message(STATUS "Detected App: " ${dir})
          add_subdirectory(${dir})
        endif()
    endif()
  endforeach()
  set(${retval} ${list_of_dirs})
endmacro()

add_all_subdirectories(retval, ${CMAKE_CURRENT_SOURCE_DIR})


================================================
FILE: apps/cascades/CMakeLists.txt
================================================
project(cascades)
add_graphlab_executable(cascades cascades.cpp)


================================================
FILE: apps/cascades/cascades.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab.hpp>
#include <math.h>
#include <cstdlib>
#include <ctime>

double infection_chance;
double recovery_chance;

enum Status {INFECTED, SUSCEPTIBLE, RECOVERED};

// The vertex data is its status (S, I, or R) 
typedef Status vertex_data_type;

// infected_status counts the number of infected neighbors, since
// the number of infected neighbors determines how likely a susceptible node is
// to be infected
struct infected_status: public graphlab::IS_POD_TYPE {
  int value;
  vertex_data_type status;

  infected_status() {
    value = 0;
    status = INFECTED;
  }

  infected_status& operator+=(const infected_status& other) {
    if (other.status == INFECTED) {
      this->value++;
    }

    return *this;
  }
};


typedef infected_status gather_type;
 
// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, graphlab::empty> graph_type;

bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;

  char label;
  // first entry in the line is a vertex ID
  strm >> vid;

  // next entry is their status (S, I, or R)
  strm >> label;

  vertex_data_type statusLabel;
  if (label == 'S') {
    statusLabel = SUSCEPTIBLE;
  } else if (label == 'I') {
    statusLabel = INFECTED;
  } else {
    statusLabel = RECOVERED;
  }

  
  // insert this vertex with its label 
  graph.add_vertex(vid, statusLabel);

  // while there are elements in the line, continue to read until we fail
  while(1) {
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    if (strm.fail()) {
      break;
    }
    graph.add_edge(vid, other_vid);
  }

  return true;
}

class cascades:
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::ALL_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
      // figure out which data to get from the edge.
      bool isEdgeSource = (vertex.id() == edge.source().id());
      vertex_data_type neighbor_status = isEdgeSource ? edge.target().data() : edge.source().data();

      // create infected_status and add neighbor's status to it. 

      infected_status status;
      status.status = neighbor_status;
      status.value = 1;
      
      return status;
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
      vertex_data_type old_data = vertex.data();

      vertex_data_type result = old_data;
      double random_value;

      // if vertex.data == RECOVERED, don't do anything
      // if vertex.data == INFECTED, roll on recovery_chance to see if recovery
      // occurs 
      // if vertex.data == SUSCEPTIBLE, then do (total) dice rolls (each time comparing
      // the result to infection_chance). if any of them show up as positive,
      // set vertex.data to INFECTED.  

      if (old_data != RECOVERED) {
        if (old_data == INFECTED) {
          random_value = ((double)rand())/RAND_MAX;
          if (random_value <= recovery_chance) {
            result = RECOVERED;
          }
        } else if (old_data == SUSCEPTIBLE) {
          for (int i = 0; i < total.value; i++) {
            random_value = ((double)rand())/RAND_MAX;
            if (random_value <= infection_chance) {
              result = INFECTED;
              break;
            }
          }
        }
      }

      vertex.data() = result;

      if (result == INFECTED) {
        context.signal(vertex);
      }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
        return graphlab::NO_EDGES;
    }
  };

struct cascades_writer{
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;

    Status status = v.data();
    char vertex_data;
    
    // Convert the status back into a char
    if (status == INFECTED) {
      vertex_data = 'I';
    } else if (status == SUSCEPTIBLE) {
      vertex_data = 'S';
    } else {
      vertex_data = 'R';
    }
    strm << v.id() << "\t" << vertex_data << "\n";
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};


int main(int argc, char** argv) {
  srand((unsigned) time(0));
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);
  
  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("Label Propagation algorithm.");
  std::string graph_dir;
  std::string execution_type = "synchronous";
  double recovery = -1;
  double infection = -1;
  size_t iterations = -1;

  clopts.attach_option("graph", graph_dir, "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("execution", execution_type, "Execution type (synchronous or asynchronous)");

  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  clopts.attach_option("recovery chance", recovery, "Chance of recovery for an infected individual at each step. Required.");
  clopts.attach_option("infection chance", infection, "Chance of infection for a susceptible individual per person at each step. Required.");

  clopts.attach_option("iterations", iterations, "If set, will force the use of synchronous engine overriding any engine option set by the --engine parameter. Runs cascades for a fixed number of iterations. Also overrides the max_iterations option in the engine.");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }

  if (recovery == -1) {
    dc.cout() << "Recovery chance not specified. Cannot continue";
    return EXIT_FAILURE;
  }

  if (infection == -1) {
    dc.cout() << "Infection chance not specified. Cannot continue";
    return EXIT_FAILURE;
  }

  infection_chance = infection;
  recovery_chance = recovery;
  
  if (iterations != -1) {
    // make sure this is the synchronous engine
    dc.cout() << "--iterations set. Forcing Synchronous engine, and running for "
    << iterations  << " iterations." << std::endl;
    clopts.get_engine_args().set_option("type", "synchronous");
    clopts.get_engine_args().set_option("max_iterations", iterations);
  }
 
  // Build the graph ----------------------------------------------------------
  graph_type graph(dc);
  dc.cout() << "Loading graph using line parser" << std::endl;
  graph.load(graph_dir, line_parser);
  // must call finalize before querying the graph
  graph.finalize();

  dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

  graphlab::omni_engine<cascades> engine(dc, graph, execution_type, clopts);

  engine.signal_all();
  engine.start();

  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl;

  if (saveprefix != "") {
    graph.save(saveprefix, cascades_writer(),
       false,  // do not gzip
       true,   //save vertices
       false); // do not save edges 
  }
  

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
}


================================================
FILE: apps/concomp/CMakeLists.txt
================================================
project(example)
add_graphlab_executable(concomp concomp.cpp)


================================================
FILE: apps/concomp/concomp.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab.hpp>

// The vertex data is just the id value
typedef graphlab::vertex_id_type vertex_data_type;
typedef graphlab::vertex_id_type edge_data_type;

typedef double gather_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, edge_data_type> graph_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertex data to the vertex id
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = -1; }

struct min_combiner : public graphlab::IS_POD_TYPE {
  vertex_data_type value;

  min_combiner() {
    value = -1;
  }
    
  min_combiner& operator+=(const min_combiner& other) { 
    if (other.value < value) {
        value = other.value;
    }
    return *this; 
  }
};

class concomp :
  public graphlab::ivertex_program<graph_type, gather_type, min_combiner>,
  public graphlab::IS_POD_TYPE {

  // set changed to determine which edges to scatter on
  bool changed;

  // local copy of the message value
  vertex_data_type message_value; 
public:
  // Receive inbound message (minimum data of adjacent vertices)
  void init(icontext_type& context, const vertex_type& vertex, const message_type& message) {
    // message.value == 4294967295 on first run, so init message_value to vertex data.
    if (message.value == 4294967295) {
      message_value = vertex.id();
    } else {
      // else, set the local copy to the message parameter.
      message_value = message.value;
    }

  }

  edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  // Change the vertex data if any of its neighbors have a lower data value.
  void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
    // mark if values differ to determine which edges to scatter on.
    if (message_value < vertex.data()) {
        changed = true;
        vertex.data() = message_value;
    } else {
        changed = false;
    }
  }

  edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    // If the vertex data changed, scatter along all edges. Otherwise stop.
    if (changed) {
        return graphlab::ALL_EDGES;
    } else {
        return graphlab::NO_EDGES;
    }
  }

  // Scatter to scatter_edges edges with the new message value.
  void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    bool isEdgeSource = (vertex.id() == edge.source().id());
    bool hasSameData = isEdgeSource ? (vertex.data() == edge.target().data()) : (vertex.data() == edge.source().data()) ;
    if (!hasSameData) {
      min_combiner combiner;
      combiner.value = message_value;

      context.signal(isEdgeSource ? edge.target() : edge.source(), combiner);
    }
  }
};

/* We want to save the final graph so we define a write which will be
* used in graph.save("path/prefix", concomp_writer()) to save the graph.
*/
struct concomp_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of concomp writer
          

int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);
  
  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("Connected Components algorithm.");
  std::string graph_dir;
  std::string format = "snap";
  std::string execution_type = "synchronous";
  clopts.attach_option("graph", graph_dir, "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("format", format, "The graph file format");
  clopts.attach_option("execution", execution_type, "Execution type (synchronous or asynchronous)");

  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }
 
  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();

  graph.transform_vertices(init_vertex);

  dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

  graphlab::omni_engine<concomp> engine(dc, graph, execution_type, clopts);

  min_combiner initial_message;
  initial_message.value = -1;

  engine.signal_all(initial_message);

  engine.start();

  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl;

  if (saveprefix != "") {
    graph.save(saveprefix, concomp_writer(),
                false,    // do not gzip
                true,     // save vertices
                false);   // do not save edges
  }

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: apps/example/CMakeLists.txt
================================================
project(example)
add_graphlab_executable(hello_world hello_world.cpp)


================================================
FILE: apps/example/hello_world.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab.hpp>

// The vertex data is just the pagerank value (a float)
typedef float vertex_data_type;

// There is no edge data in the pagerank application
typedef float edge_data_type;
typedef float message_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, edge_data_type> graph_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertex data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = vertex.id(); }

struct min_combiner {
    graphlab::vertex_id_type v;
    min_combiner& operator+=(const min_combiner& other) { 
        v = std::min(v, other.v);  
        return *this; 
    }
};

class concomp :
    public graphlab::ivertex_program<graph_type, float>,
    public graphlab::IS_POD_TYPE {
    bool changed;
public:
    float gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
       float edge_data = edge.source().data();
       float vertex_data = vertex.data();

       std::cout << "current vertex id: " << vertex.id() << " data: " << vertex.data() << "\n";


       std::cout << "\tedge vertex id: " << edge.source().id() << " data: " << edge_data << "\n";
       if (edge_data < vertex_data) {
           std::cout << "returning edge data: " << edge_data << "\n";
           return edge_data;
       } else {
           std::cout << "returning vertex data: " << vertex_data << "\n";
           return vertex_data;
       }
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& smallest) {
        std::cout << "vertex id: " << vertex.id() << " data: " << vertex.data() << "\n";
        std::cout << "smallest: " << smallest << "\n";

        if (smallest < vertex.data()) {
            vertex.data() = smallest;
            changed = true;
        } else {
            changed = false;
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
        if (changed) {
            return graphlab::ALL_EDGES;
        } else {
            return graphlab::NO_EDGES;
        }
    }

    /* The scatter function just signal adjacent pages */
    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        context.signal(edge.target());
    }
};

/* We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", concomp_writer()) to save the graph.
 */
struct concomp_writer {
    std::string save_vertex(graph_type::vertex_type v) {
        std::stringstream strm;
        strm << v.id() << "\t" << v.data() << "\n";
        return strm.str();
    }
    std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of concomp writer
            

int main(int argc, char** argv) {
   // Initialize control plain using mpi
   graphlab::mpi_tools::init(argc, argv);
   graphlab::distributed_control dc;
   global_logger().set_log_level(LOG_INFO);
    
   // Parse command line options -----------------------------------------------
   graphlab::command_line_options clopts("PageRank algorithm.");
   std::string graph_dir;
   std::string format = "snap";
   clopts.attach_option("graph", graph_dir, "The graph file. Required ");
   clopts.add_positional("graph");
   clopts.attach_option("format", format, "The graph file format");
   if(!clopts.parse(argc, argv)) {
       dc.cout() << "Error in parsing command line arguments." << std::endl;
       return EXIT_FAILURE;
   }
   if (graph_dir == "") {
       dc.cout() << "Graph not specified. Cannot continue";
       return EXIT_FAILURE;
   }
   
   // Build the graph ----------------------------------------------------------
   graph_type graph(dc, clopts);
   dc.cout() << "Loading graph in format: "<< format << std::endl;
   graph.load_format(graph_dir, format);
   // must call finalize before querying the graph
   graph.finalize();

   graph.transform_vertices(init_vertex);

   dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

   graphlab::omni_engine<concomp> engine(dc, graph, "synchronous", clopts);
   engine.signal_all();

   engine.start();

   const float runtime = engine.elapsed_seconds();
   dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl;

   graph.save("output" + graph_dir + ".txt", concomp_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
}


================================================
FILE: apps/label_propagation/CMakeLists.txt
================================================
project(label_propagation)
add_graphlab_executable(label_propagation label_propagation.cpp)


================================================
FILE: apps/label_propagation/label_propagation.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab.hpp>

struct label_counter {
  std::map<std::string, int> label_count;

  label_counter() {
  }
    
  label_counter& operator+=(const label_counter& other) { 
    for ( std::map<std::string, int>::const_iterator iter = other.label_count.begin();
              iter != other.label_count.end(); ++iter ) {
            label_count[iter->first] += iter->second;
    }

    return *this; 
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << label_count;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> label_count;
  }
};

// The vertex data is its label 
typedef std::string vertex_data_type;

typedef label_counter gather_type;
 
// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<std::string, graphlab::empty> graph_type;

bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;
  std::string label;
  // first entry in the line is a vertex ID
  strm >> vid;
  strm >> label;
  // insert this vertex with its label 
  graph.add_vertex(vid, label);
  // while there are elements in the line, continue to read until we fail
  while(1){
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    if (strm.fail())
      break;
    graph.add_edge(vid, other_vid);
  }

  return true;
}

class labelpropagation :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::ALL_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
      // figure out which data to get from the edge.
      bool isEdgeSource = (vertex.id() == edge.source().id());
      std::string neighbor_label = isEdgeSource ? edge.target().data() : edge.source().data();

      // make a label_counter and place the neighbor data in it
      label_counter counter;
      counter.label_count[neighbor_label] = 1;


      // gather_type is a label counter, so += will add neighbor counts to the
      // label_count map.
      return counter;
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {

      int maxCount = 0;

      std::string maxLabel = vertex.data();

      // Figure out which label of the vertex's neighbors' labels is most common
      for ( std::map<std::string, int>::const_iterator iter = total.label_count.begin();
                iter != total.label_count.end(); ++iter ) {
              if (iter->second > maxCount) {
                maxCount = iter->second;
                maxLabel = iter->first;
              }
      }

      
      // if maxLabel differs to vertex data, mark vertex as changed and update
      // its data.
      if ((vertex.data()).compare(maxLabel) != 0) {
        changed = true;
        vertex.data() = maxLabel;
      } else {
        changed = false;
      }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
      if (changed) {
        return graphlab::ALL_EDGES;
      } else {
        return graphlab::NO_EDGES;
      }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
      bool isEdgeSource = (vertex.id() == edge.source().id());

      context.signal(isEdgeSource ? edge.target() : edge.source()); 
    }
  };

struct labelpropagation_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);
  
  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("Label Propagation algorithm.");
  std::string graph_dir;
  std::string execution_type = "synchronous";
  clopts.attach_option("graph", graph_dir, "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("execution", execution_type, "Execution type (synchronous or asynchronous)");

  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }
 
  // Build the graph ----------------------------------------------------------
  graph_type graph(dc);
  dc.cout() << "Loading graph using line parser" << std::endl;
  graph.load(graph_dir, line_parser);
  // must call finalize before querying the graph
  graph.finalize();

  dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

  graphlab::omni_engine<labelpropagation> engine(dc, graph, execution_type, clopts);

  engine.signal_all();
  engine.start();

  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl;

  if (saveprefix != "") {
    graph.save(saveprefix, labelpropagation_writer(),
       false,  // do not gzip
       true,   //save vertices
       false); // do not save edges 
  }
  

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
}


================================================
FILE: cmake/FindAnt.cmake
================================================
# - Try to find Ant
find_file(ANT_EXEC NAMES ant ant.sh ant.bat PATHS $ENV{ANT_HOME}/bin)

INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ANT DEFAULT_MSG ANT_EXEC)

MARK_AS_ADVANCED(ANT_EXEC)

================================================
FILE: cmake/FindBoost.cmake
================================================
# - Try to find Boost include dirs and libraries
# Usage of this module as follows:
#
#     SET(Boost_USE_STATIC_LIBS ON)
#     SET(Boost_USE_MULTITHREAD OFF)
#     FIND_PACKAGE( Boost 1.34.1 COMPONENTS date_time filesystem iostreams ... )
#
# The Boost_ADDITIONAL_VERSIONS variable can be used to specify a list of
# boost version numbers that should be taken into account when searching
# for the libraries. Unfortunately boost puts the version number into the
# actual filename for the libraries, so this might be needed in the future
# when new Boost versions are released.
#
# Currently this module searches for the following version numbers:
# 1.33, 1.33.0, 1.33.1, 1.34, 1.34.0, 1.34.1, 1.35, 1.35.0, 1.35.1, 1.36, 
# 1.36.0, 1.36.1
#
# The components list needs to be the actual names of boost libraries, that is
# the part of the actual library files that differ on different libraries. So
# its "date_time" for "libboost_date_time...". Anything else will result in
# errors
#
# You can provide a minimum version number that should be used. If you provide this 
# version number and specify the REQUIRED attribute, this module will fail if it
# can't find the specified or a later version. If you specify a version number this is
# automatically put into the considered list of version numbers and thus doesn't need
# to be specified in the Boost_ADDITIONAL_VERSIONS variable
#
# Variables used by this module, they can change the default behaviour and need to be set
# before calling find_package:
#  Boost_USE_MULTITHREAD         Can be set to OFF to use the non-multithreaded
#                                boost libraries. Defaults to ON.
#  Boost_USE_STATIC_LIBS         Can be set to ON to force the use of the static
#                                boost libraries. Defaults to OFF.
#  Boost_ADDITIONAL_VERSIONS     A list of version numbers to use for searching
#                                the boost include directory. The default list
#                                of version numbers is:
#                                1.33, 1.33.0, 1.33.1, 1.34, 1.34.0, 1.34.1, 
#                                1.35, 1.35.0, 1.35.1, 1.36, 1.36.0, 1.36.1
#                                If you want to look for an older or newer
#                                version set this variable to a list of
#                                strings, where each string contains a number, i.e.
#                                SET(Boost_ADDITIONAL_VERSIONS "0.99.0" "1.35.0")
#  BOOST_ROOT or BOOSTROOT       Preferred installation prefix for searching for Boost,
#                                set this if the module has problems finding the proper Boost installation
#  BOOST_INCLUDEDIR              Set this to the include directory of Boost, if the
#                                module has problems finding the proper Boost installation
#  BOOST_LIBRARYDIR              Set this to the lib directory of Boost, if the
#                                module has problems finding the proper Boost installation
#
#  The last three variables are available also as environment variables
#
#
# Variables defined by this module:
#
#  Boost_FOUND                          System has Boost, this means the include dir was found,
#                                       as well as all the libraries specified in the COMPONENTS list
#  Boost_INCLUDE_DIRS                   Boost include directories, not cached
#  Boost_INCLUDE_DIR                    This is almost the same as above, but this one is cached and may be
#                                       modified by advanced users
#  Boost_LIBRARIES                      Link these to use the Boost libraries that you specified, not cached
#  Boost_LIBRARY_DIRS                   The path to where the Boost library files are.
#  Boost_VERSION                        The version number of the boost libraries that have been found,
#                                       same as in version.hpp from Boost
#  Boost_LIB_VERSION                    The version number in filename form as its appended to the library filenames
#  Boost_MAJOR_VERSION                  major version number of boost
#  Boost_MINOR_VERSION                  minor version number of boost
#  Boost_SUBMINOR_VERSION               subminor version number of boost
#  Boost_LIB_DIAGNOSTIC_DEFINITIONS     Only set on windows. Can be used with add_definitions 
#                                       to print diagnostic information about the automatic 
#                                       linking done on windows.

# For each component you list the following variables are set.
# ATTENTION: The component names need to be in lower case, just as the boost
# library names however the cmake variables use upper case for the component
# part. So you'd get Boost_SERIALIZATION_FOUND for example.
#
#  Boost_${COMPONENT}_FOUND             True IF the Boost library "component" was found.
#  Boost_${COMPONENT}_LIBRARY           The absolute path of the Boost library "component".
#  Boost_${COMPONENT}_LIBRARY_DEBUG     The absolute path of the debug version of the
#                                       Boost library "component".
#  Boost_${COMPONENT}_LIBRARY_RELEASE   The absolute path of the release version of the
#                                       Boost library "component"
#
#  Copyright (c) 2006-2008 Andreas Schneider <mail@cynapses.org>
#  Copyright (c) 2007      Wengo
#  Copyright (c) 2007      Mike Jackson
#  Copyright (c) 2008      Andreas Pakulat <apaku@gmx.de>
#
#  Redistribution AND use is allowed according to the terms of the New
#  BSD license.
#  For details see the accompanying COPYING-CMAKE-SCRIPTS file.
#
OPTION(Boost_USE_MULTITHREADED 
  "Use the multithreaded versions of the Boost libraries" OFF)

if (Boost_FIND_VERSION_EXACT)
  if (Boost_FIND_VERSION_PATCH)
    set( _boost_TEST_VERSIONS 
      "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}.${Boost_FIND_VERSION_PATCH}")
  else (Boost_FIND_VERSION_PATCH)
    set( _boost_TEST_VERSIONS 
      "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}.0"
      "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}")
  endif (Boost_FIND_VERSION_PATCH)
else (Boost_FIND_VERSION_EXACT)
  set( _boost_TEST_VERSIONS ${Boost_ADDITIONAL_VERSIONS} 
    "1.43" "1.42" "1.41" "1.40" "1.39" "1.38" "1.37" )
endif (Boost_FIND_VERSION_EXACT)

# The reason that we failed to find Boost. This will be set to a
# user-friendly message when we fail to find some necessary piece of
# Boost.
set(Boost_ERROR_REASON)

############################################
#
# Check the existence of the libraries.
#
############################################
# This macro was taken directly from the FindQt4.cmake file that is included
# with the CMake distribution. This is NOT my work. All work was done by the
# original authors of the FindQt4.cmake file. Only minor modifications were
# made to remove references to Qt and make this file more generally applicable
#########################################################################

MACRO (_Boost_ADJUST_LIB_VARS basename)
  IF (Boost_INCLUDE_DIR )
    IF (Boost_${basename}_LIBRARY_DEBUG AND Boost_${basename}_LIBRARY_RELEASE)
      # if the generator supports configuration types then set
      # optimized and debug libraries, or if the CMAKE_BUILD_TYPE has a value
      IF (CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE)
        SET(Boost_${basename}_LIBRARY optimized ${Boost_${basename}_LIBRARY_RELEASE} debug ${Boost_${basename}_LIBRARY_DEBUG})
      ELSE(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE)
        # if there are no configuration types and CMAKE_BUILD_TYPE has no value
        # then just use the release libraries
        SET(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY_RELEASE} )
      ENDIF(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE)
      SET(Boost_${basename}_LIBRARIES optimized ${Boost_${basename}_LIBRARY_RELEASE} debug ${Boost_${basename}_LIBRARY_DEBUG})
    ENDIF (Boost_${basename}_LIBRARY_DEBUG AND Boost_${basename}_LIBRARY_RELEASE)

    # if only the release version was found, set the debug variable also to the release version
    IF (Boost_${basename}_LIBRARY_RELEASE AND NOT Boost_${basename}_LIBRARY_DEBUG)
      SET(Boost_${basename}_LIBRARY_DEBUG ${Boost_${basename}_LIBRARY_RELEASE})
      SET(Boost_${basename}_LIBRARY       ${Boost_${basename}_LIBRARY_RELEASE})
      SET(Boost_${basename}_LIBRARIES     ${Boost_${basename}_LIBRARY_RELEASE})
    ENDIF (Boost_${basename}_LIBRARY_RELEASE AND NOT Boost_${basename}_LIBRARY_DEBUG)

    # if only the debug version was found, set the release variable also to the debug version
    IF (Boost_${basename}_LIBRARY_DEBUG AND NOT Boost_${basename}_LIBRARY_RELEASE)
      SET(Boost_${basename}_LIBRARY_RELEASE ${Boost_${basename}_LIBRARY_DEBUG})
      SET(Boost_${basename}_LIBRARY         ${Boost_${basename}_LIBRARY_DEBUG})
      SET(Boost_${basename}_LIBRARIES       ${Boost_${basename}_LIBRARY_DEBUG})
    ENDIF (Boost_${basename}_LIBRARY_DEBUG AND NOT Boost_${basename}_LIBRARY_RELEASE)
    
    IF (Boost_${basename}_LIBRARY)
      SET(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY} CACHE FILEPATH "The Boost ${basename} library")
      GET_FILENAME_COMPONENT(Boost_LIBRARY_DIRS "${Boost_${basename}_LIBRARY}" PATH)
      SET(Boost_LIBRARY_DIRS ${Boost_LIBRARY_DIRS} CACHE FILEPATH "Boost library directory")
      SET(Boost_${basename}_FOUND ON CACHE INTERNAL "Whether the Boost ${basename} library found")
    ENDIF (Boost_${basename}_LIBRARY)

  ENDIF (Boost_INCLUDE_DIR )
  # Make variables changeble to the advanced user
  MARK_AS_ADVANCED(
      Boost_${basename}_LIBRARY
      Boost_${basename}_LIBRARY_RELEASE
      Boost_${basename}_LIBRARY_DEBUG
  )
ENDMACRO (_Boost_ADJUST_LIB_VARS)

#-------------------------------------------------------------------------------


SET( _boost_IN_CACHE TRUE)
IF(Boost_INCLUDE_DIR)
  FOREACH(COMPONENT ${Boost_FIND_COMPONENTS})
    STRING(TOUPPER ${COMPONENT} COMPONENT)
    IF(NOT Boost_${COMPONENT}_FOUND)
      SET( _boost_IN_CACHE FALSE)
    ENDIF(NOT Boost_${COMPONENT}_FOUND)
  ENDFOREACH(COMPONENT)
ELSE(Boost_INCLUDE_DIR)
  SET( _boost_IN_CACHE FALSE)
ENDIF(Boost_INCLUDE_DIR)

IF (_boost_IN_CACHE)
  # in cache already
  SET(Boost_FOUND TRUE)
  FOREACH(COMPONENT ${Boost_FIND_COMPONENTS})
    STRING(TOUPPER ${COMPONENT} COMPONENT)
    _Boost_ADJUST_LIB_VARS( ${COMPONENT} )
    SET(Boost_LIBRARIES ${Boost_LIBRARIES} ${Boost_${COMPONENT}_LIBRARY})
  ENDFOREACH(COMPONENT)
  SET(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR})
  IF(Boost_VERSION AND NOT "${Boost_VERSION}" STREQUAL "0")
    MATH(EXPR Boost_MAJOR_VERSION "${Boost_VERSION} / 100000")
    MATH(EXPR Boost_MINOR_VERSION "${Boost_VERSION} / 100 % 1000")
    MATH(EXPR Boost_SUBMINOR_VERSION "${Boost_VERSION} % 100")
  ENDIF(Boost_VERSION AND NOT "${Boost_VERSION}" STREQUAL "0")
ELSE (_boost_IN_CACHE)
  # Need to search for boost

  IF(WIN32)
    # In windows, automatic linking is performed, so you do not have
    # to specify the libraries.  If you are linking to a dynamic
    # runtime, then you can choose to link to either a static or a
    # dynamic Boost library, the default is to do a static link.  You
    # can alter this for a specific library "whatever" by defining
    # BOOST_WHATEVER_DYN_LINK to force Boost library "whatever" to be
    # linked dynamically.  Alternatively you can force all Boost
    # libraries to dynamic link by defining BOOST_ALL_DYN_LINK.
  
    # This feature can be disabled for Boost library "whatever" by
    # defining BOOST_WHATEVER_NO_LIB, or for all of Boost by defining
    # BOOST_ALL_NO_LIB.
  
    # If you want to observe which libraries are being linked against
    # then defining BOOST_LIB_DIAGNOSTIC will cause the auto-linking
    # code to emit a #pragma message each time a library is selected
    # for linking.
    SET(Boost_LIB_DIAGNOSTIC_DEFINITIONS 
      "-DBOOST_LIB_DIAGNOSTIC" CACHE STRING "Boost diagnostic define")
  ENDIF(WIN32)

  SET(_boost_INCLUDE_SEARCH_DIRS
    C:/boost/include
    "C:/boost"
    "$ENV{ProgramFiles}/boost/boost_${Boost_FIND_VERSION_MAJOR}_${Boost_FIND_VERSION_MINOR}_${Boost_FIND_VERSION_PATCH}"
    "$ENV{ProgramFiles}/Boost"
    /sw/local/include
  )

  SET(_boost_LIBRARIES_SEARCH_DIRS
    C:/boost/lib
    "C:/boost"
    "$ENV{ProgramFiles}/boost/boost_${Boost_FIND_VERSION_MAJOR}_${Boost_FIND_VERSION_MINOR}_${Boost_FIND_VERSION_PATCH}/lib"
    "$ENV{ProgramFiles}/Boost"
    /sw/local/lib
  )

  # If BOOST_ROOT was defined in the environment, use it.
  if (NOT BOOST_ROOT AND NOT $ENV{BOOST_ROOT} STREQUAL "")
    set(BOOST_ROOT $ENV{BOOST_ROOT})
  endif(NOT BOOST_ROOT AND NOT $ENV{BOOST_ROOT} STREQUAL "")

  # If BOOSTROOT was defined in the environment, use it.
  if (NOT BOOST_ROOT AND NOT $ENV{BOOSTROOT} STREQUAL "")
    set(BOOST_ROOT $ENV{BOOSTROOT})
  endif(NOT BOOST_ROOT AND NOT $ENV{BOOSTROOT} STREQUAL "")

  # If BOOST_INCLUDEDIR was defined in the environment, use it.
  IF( NOT $ENV{BOOST_INCLUDEDIR} STREQUAL "" )
    set(BOOST_INCLUDEDIR $ENV{BOOST_INCLUDEDIR})
  ENDIF( NOT $ENV{BOOST_INCLUDEDIR} STREQUAL "" )

  # If BOOST_LIBRARYDIR was defined in the environment, use it.
  IF( NOT $ENV{BOOST_LIBRARYDIR} STREQUAL "" )
    set(BOOST_LIBRARYDIR $ENV{BOOST_LIBRARYDIR})
  ENDIF( NOT $ENV{BOOST_LIBRARYDIR} STREQUAL "" )

  IF( BOOST_ROOT )
    file(TO_CMAKE_PATH ${BOOST_ROOT} BOOST_ROOT)
    SET(_boost_INCLUDE_SEARCH_DIRS 
      ${BOOST_ROOT}/include 
      ${BOOST_ROOT}
      ${_boost_INCLUDE_SEARCH_DIRS})
    SET(_boost_LIBRARIES_SEARCH_DIRS 
      ${BOOST_ROOT}/lib 
      ${BOOST_ROOT}/stage/lib 
      ${_boost_LIBRARIES_SEARCH_DIRS})
  ENDIF( BOOST_ROOT )

  IF( BOOST_INCLUDEDIR )
    file(TO_CMAKE_PATH ${BOOST_INCLUDEDIR} BOOST_INCLUDEDIR)
    SET(_boost_INCLUDE_SEARCH_DIRS 
      ${BOOST_INCLUDEDIR} ${_boost_INCLUDE_SEARCH_DIRS})
  ENDIF( BOOST_INCLUDEDIR )

  IF( BOOST_LIBRARYDIR )
    file(TO_CMAKE_PATH ${BOOST_LIBRARYDIR} BOOST_LIBRARYDIR)
    SET(_boost_LIBRARIES_SEARCH_DIRS 
      ${BOOST_LIBRARYDIR} ${_boost_LIBRARIES_SEARCH_DIRS})
  ENDIF( BOOST_LIBRARYDIR )

  # Try to find Boost by stepping backwards through the Boost versions
  # we know about.
  IF( NOT Boost_INCLUDE_DIR )
    # Build a list of path suffixes for each version.
    SET(_boost_PATH_SUFFIXES)
    FOREACH(_boost_VER ${_boost_TEST_VERSIONS})
      # Add in a path suffix, based on the required version, ideally
      # we could read this from version.hpp, but for that to work we'd
      # need to know the include dir already
      if (WIN32 AND NOT CYGWIN)
        set(_boost_PATH_SUFFIX boost_${_boost_VER})
      else (WIN32 AND NOT CYGWIN)
        set(_boost_PATH_SUFFIX boost-${_boost_VER})
      endif (WIN32 AND NOT CYGWIN)

      IF(_boost_PATH_SUFFIX MATCHES "[0-9]+\\.[0-9]+\\.[0-9]+")
          STRING(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\1_\\2_\\3" 
            _boost_PATH_SUFFIX ${_boost_PATH_SUFFIX})
      ELSEIF(_boost_PATH_SUFFIX MATCHES "[0-9]+\\.[0-9]+")
          STRING(REGEX REPLACE "([0-9]+)\\.([0-9]+)" "\\1_\\2" 
            _boost_PATH_SUFFIX ${_boost_PATH_SUFFIX})
      ENDIF(_boost_PATH_SUFFIX MATCHES "[0-9]+\\.[0-9]+\\.[0-9]+")
      LIST(APPEND _boost_PATH_SUFFIXES "${_boost_PATH_SUFFIX}")
    ENDFOREACH(_boost_VER)

    # Look for a standard boost header file.
    FIND_PATH(Boost_INCLUDE_DIR
      NAMES         boost/config.hpp
      HINTS         ${_boost_INCLUDE_SEARCH_DIRS}
      PATH_SUFFIXES ${_boost_PATH_SUFFIXES}
      )
  ENDIF( NOT Boost_INCLUDE_DIR )

  IF(Boost_INCLUDE_DIR)
    # Extract Boost_VERSION and Boost_LIB_VERSION from version.hpp
    # Read the whole file:
    #
    SET(BOOST_VERSION 0)
    SET(BOOST_LIB_VERSION "")
    FILE(READ "${Boost_INCLUDE_DIR}/boost/version.hpp" _boost_VERSION_HPP_CONTENTS)
  
    STRING(REGEX REPLACE ".*#define BOOST_VERSION ([0-9]+).*" "\\1" Boost_VERSION "${_boost_VERSION_HPP_CONTENTS}")
    STRING(REGEX REPLACE ".*#define BOOST_LIB_VERSION \"([0-9_]+)\".*" "\\1" Boost_LIB_VERSION "${_boost_VERSION_HPP_CONTENTS}")
  
    SET(Boost_LIB_VERSION ${Boost_LIB_VERSION} CACHE INTERNAL "The library version string for boost libraries")
    SET(Boost_VERSION ${Boost_VERSION} CACHE INTERNAL "The version number for boost libraries")
    
    IF(NOT "${Boost_VERSION}" STREQUAL "0")
      MATH(EXPR Boost_MAJOR_VERSION "${Boost_VERSION} / 100000")
      MATH(EXPR Boost_MINOR_VERSION "${Boost_VERSION} / 100 % 1000")
      MATH(EXPR Boost_SUBMINOR_VERSION "${Boost_VERSION} % 100")

      set(Boost_ERROR_REASON
          "${Boost_ERROR_REASON}Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}\nBoost include path: ${Boost_INCLUDE_DIR}")
    ENDIF(NOT "${Boost_VERSION}" STREQUAL "0")
  ELSE(Boost_INCLUDE_DIR)
    set(Boost_ERROR_REASON
      "${Boost_ERROR_REASON}Unable to find the Boost header files. Please set BOOST_ROOT to the root directory containing Boost or BOOST_INCLUDEDIR to the directory containing Boost's headers.")
  ENDIF(Boost_INCLUDE_DIR)

  # Setting some more suffixes for the library
  SET (Boost_LIB_PREFIX "")

  # Stano: cygwin does not seem to need the lib prefix, is this MSVC-only thing?
  # IF ( WIN32 AND Boost_USE_STATIC_LIBS )
  #   SET (Boost_LIB_PREFIX "lib")
  # ENDIF ( WIN32 AND Boost_USE_STATIC_LIBS )

  IF ( MSVC AND Boost_USE_STATIC_LIBS )
    SET (Boost_LIB_PREFIX "lib")
  ENDIF ( MSVC AND Boost_USE_STATIC_LIBS )

  SET (_boost_COMPILER "-gcc")
  IF (MSVC90)
    SET (_boost_COMPILER "-vc90")
  ELSEIF (MSVC80)
    SET (_boost_COMPILER "-vc80")
  ELSEIF (MSVC71)
    SET (_boost_COMPILER "-vc71")
  ENDIF(MSVC90)
  IF (MINGW)
    EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
      ARGS -dumpversion
      OUTPUT_VARIABLE _boost_COMPILER_VERSION
      )
    STRING(REGEX REPLACE "([0-9])\\.([0-9])\\.[0-9]" "\\1\\2"
      _boost_COMPILER_VERSION ${_boost_COMPILER_VERSION})
    SET (_boost_COMPILER "-mgw${_boost_COMPILER_VERSION}")
  ENDIF(MINGW)
  IF (UNIX)
    IF (NOT CMAKE_COMPILER_IS_GNUCC)
      # We assume that we have the Intel compiler.
      SET (_boost_COMPILER "-il")
    ELSE (NOT CMAKE_COMPILER_IS_GNUCC)
      # Determine which version of GCC we have.
      EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
        ARGS -dumpversion
        OUTPUT_VARIABLE _boost_COMPILER_VERSION
        )
      STRING(REGEX REPLACE "([0-9])\\.([0-9])\\.[0-9]" "\\1\\2"
        _boost_COMPILER_VERSION ${_boost_COMPILER_VERSION})
      IF(APPLE)
        IF(Boost_MINOR_VERSION)
          IF(${Boost_MINOR_VERSION} GREATER 35)
            # In Boost 1.36.0 and newer, the mangled compiler name used
            # on Mac OS X/Darwin is "xgcc".
            SET(_boost_COMPILER "-xgcc${_boost_COMPILER_VERSION}")
          ELSE(${Boost_MINOR_VERSION} GREATER 35)
            # In Boost <= 1.35.0, there is no mangled compiler name for
            # the Mac OS X/Darwin version of GCC.
            SET(_boost_COMPILER "")
          ENDIF(${Boost_MINOR_VERSION} GREATER 35)
        ELSE(Boost_MINOR_VERSION)
          # We don't know the Boost version, so assume it's
          # pre-1.36.0.
          SET(_boost_COMPILER "")
        ENDIF(Boost_MINOR_VERSION)
      ELSE(APPLE)
        SET (_boost_COMPILER "-gcc${_boost_COMPILER_VERSION}")
      ENDIF(APPLE)
    ENDIF (NOT CMAKE_COMPILER_IS_GNUCC)
  ENDIF(UNIX)


  SET (_boost_MULTITHREADED "-mt")

  IF( NOT Boost_USE_MULTITHREADED )
    SET (_boost_MULTITHREADED "")
  ENDIF( NOT Boost_USE_MULTITHREADED )

  SET( _boost_STATIC_TAG "")
  IF (WIN32)
    IF(MSVC)
      SET (_boost_ABI_TAG "g")
    ENDIF(MSVC)
    IF( Boost_USE_STATIC_LIBS )
      SET( _boost_STATIC_TAG "-s")
    ENDIF( Boost_USE_STATIC_LIBS )
  ENDIF(WIN32)
  SET (_boost_ABI_TAG "${_boost_ABI_TAG}d")

  # ------------------------------------------------------------------------
  #  Begin finding boost libraries
  # ------------------------------------------------------------------------
  FOREACH(COMPONENT ${Boost_FIND_COMPONENTS})
    STRING(TOUPPER ${COMPONENT} UPPERCOMPONENT)
    SET( Boost_${UPPERCOMPONENT}_LIBRARY "Boost_${UPPERCOMPONENT}_LIBRARY-NOTFOUND" )
    SET( Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE "Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE-NOTFOUND" )
    SET( Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG "Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG-NOTFOUND")

    # Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
    IF( Boost_USE_STATIC_LIBS )
      SET( _boost_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
      IF(WIN32)
        SET(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
      ELSE(WIN32)
        SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
      ENDIF(WIN32)
    ENDIF( Boost_USE_STATIC_LIBS )

    FIND_LIBRARY(Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE
        NAMES  ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}-${Boost_LIB_VERSION}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_STATIC_TAG}-${Boost_LIB_VERSION}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_STATIC_TAG}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}
        HINTS  ${_boost_LIBRARIES_SEARCH_DIRS}
    )

    FIND_LIBRARY(Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG
        NAMES  ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}-${_boost_ABI_TAG}-${Boost_LIB_VERSION}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_STATIC_TAG}${_boost_ABI_TAG}-${Boost_LIB_VERSION}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}-${_boost_ABI_TAG}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_STATIC_TAG}${_boost_ABI_TAG}
               ${Boost_LIB_PREFIX}boost_${COMPONENT}-${_boost_ABI_TAG}
        HINTS  ${_boost_LIBRARIES_SEARCH_DIRS}
    )

    _Boost_ADJUST_LIB_VARS(${UPPERCOMPONENT})
    IF( Boost_USE_STATIC_LIBS )
      SET(CMAKE_FIND_LIBRARY_SUFFIXES ${_boost_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
    ENDIF( Boost_USE_STATIC_LIBS )
  ENDFOREACH(COMPONENT)
  # ------------------------------------------------------------------------
  #  End finding boost libraries
  # ------------------------------------------------------------------------

  SET(Boost_INCLUDE_DIRS
    ${Boost_INCLUDE_DIR}
  )

  SET(Boost_FOUND FALSE)
  IF(Boost_INCLUDE_DIR)
    SET( Boost_FOUND TRUE )

    # Check the version of Boost against the requested version.
    if (Boost_FIND_VERSION AND NOT Boost_FIND_VERSION_MINOR)
      message(SEND_ERROR "When requesting a specific version of Boost, you must provide at least the major and minor version numbers, e.g., 1.34")
    endif (Boost_FIND_VERSION AND NOT Boost_FIND_VERSION_MINOR)
    if(Boost_MAJOR_VERSION LESS "${Boost_FIND_VERSION_MAJOR}" )
      set( Boost_FOUND FALSE )
      set(_Boost_VERSION_AGE "old")
    elseif(Boost_MAJOR_VERSION EQUAL "${Boost_FIND_VERSION_MAJOR}" )
      if(Boost_MINOR_VERSION LESS "${Boost_FIND_VERSION_MINOR}" )
        set( Boost_FOUND FALSE )
        set(_Boost_VERSION_AGE "old")
      elseif(Boost_MINOR_VERSION EQUAL "${Boost_FIND_VERSION_MINOR}" )
        if( Boost_FIND_VERSION_PATCH AND Boost_SUBMINOR_VERSION LESS "${Boost_FIND_VERSION_PATCH}" )
          set( Boost_FOUND FALSE )
          set(_Boost_VERSION_AGE "old")
        endif( Boost_FIND_VERSION_PATCH AND Boost_SUBMINOR_VERSION LESS "${Boost_FIND_VERSION_PATCH}" )
      endif( Boost_MINOR_VERSION LESS "${Boost_FIND_VERSION_MINOR}" )
    endif( Boost_MAJOR_VERSION LESS "${Boost_FIND_VERSION_MAJOR}" )

    if (Boost_FOUND AND Boost_FIND_VERSION_EXACT)
      # If the user requested an exact version of Boost, check
      # that. We already know that the Boost version we have is >= the
      # requested version.
      set(_Boost_VERSION_AGE "new")

      # If the user didn't specify a patchlevel, it's 0.
      if (NOT Boost_FIND_VERSION_PATCH)
        set(Boost_FIND_VERSION_PATCH 0)
      endif (NOT Boost_FIND_VERSION_PATCH)
      
      # We'll set Boost_FOUND true again if we have an exact version match.
      set(Boost_FOUND FALSE)
      if(Boost_MAJOR_VERSION EQUAL "${Boost_FIND_VERSION_MAJOR}" )
        if(Boost_MINOR_VERSION EQUAL "${Boost_FIND_VERSION_MINOR}" )
          if(Boost_SUBMINOR_VERSION EQUAL "${Boost_FIND_VERSION_PATCH}" )
            set( Boost_FOUND TRUE )
          endif(Boost_SUBMINOR_VERSION EQUAL "${Boost_FIND_VERSION_PATCH}" )
        endif( Boost_MINOR_VERSION EQUAL "${Boost_FIND_VERSION_MINOR}" )
      endif( Boost_MAJOR_VERSION EQUAL "${Boost_FIND_VERSION_MAJOR}" )
    endif (Boost_FOUND AND Boost_FIND_VERSION_EXACT)

    if(NOT Boost_FOUND)
      # State that we found a version of Boost that is too new or too old.
      set(Boost_ERROR_REASON
        "${Boost_ERROR_REASON}\nDetected version of Boost is too ${_Boost_VERSION_AGE}. Requested version was ${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}")
      if (Boost_FIND_VERSION_PATCH)
        set(Boost_ERROR_REASON 
          "${Boost_ERROR_REASON}.${Boost_FIND_VERSION_PATCH}")
      endif (Boost_FIND_VERSION_PATCH)
      if (NOT Boost_FIND_VERSION_EXACT)
        set(Boost_ERROR_REASON "${Boost_ERROR_REASON} (or newer)")
      endif (NOT Boost_FIND_VERSION_EXACT)
      set(Boost_ERROR_REASON "${Boost_ERROR_REASON}.")
    endif (NOT Boost_FOUND)

    if (Boost_FOUND)
      set(_boost_CHECKED_COMPONENT FALSE)
      set(_Boost_MISSING_COMPONENTS)
      foreach(COMPONENT ${Boost_FIND_COMPONENTS})
        string(TOUPPER ${COMPONENT} COMPONENT)
        set(_boost_CHECKED_COMPONENT TRUE)
        if(NOT Boost_${COMPONENT}_FOUND)
          string(TOLOWER ${COMPONENT} COMPONENT)
          list(APPEND _Boost_MISSING_COMPONENTS ${COMPONENT})
          set( Boost_FOUND FALSE)
        endif(NOT Boost_${COMPONENT}_FOUND)
      endforeach(COMPONENT)
    endif (Boost_FOUND)

    if (_Boost_MISSING_COMPONENTS)
      # We were unable to find some libraries, so generate a sensible
      # error message that lists the libraries we were unable to find.
      set(Boost_ERROR_REASON
        "${Boost_ERROR_REASON}\nThe following Boost libraries could not be found:\n")
      foreach(COMPONENT ${_Boost_MISSING_COMPONENTS})
        set(Boost_ERROR_REASON
          "${Boost_ERROR_REASON}        boost_${COMPONENT}\n")
      endforeach(COMPONENT)

      list(LENGTH Boost_FIND_COMPONENTS Boost_NUM_COMPONENTS_WANTED)
      list(LENGTH _Boost_MISSING_COMPONENTS Boost_NUM_MISSING_COMPONENTS)
      if (${Boost_NUM_COMPONENTS_WANTED} EQUAL ${Boost_NUM_MISSING_COMPONENTS})
        set(Boost_ERROR_REASON
          "${Boost_ERROR_REASON}No Boost libraries were found. You may need to set Boost_LIBRARYDIR to the directory containing Boost libraries or BOOST_ROOT to the location of Boost.")
      else (${Boost_NUM_COMPONENTS_WANTED} EQUAL ${Boost_NUM_MISSING_COMPONENTS})
        set(Boost_ERROR_REASON
          "${Boost_ERROR_REASON}Some (but not all) of the required Boost libraries were found. You may need to install these additional Boost libraries. Alternatively, set Boost_LIBRARYDIR to the directory containing Boost libraries or BOOST_ROOT to the location of Boost.")
      endif (${Boost_NUM_COMPONENTS_WANTED} EQUAL ${Boost_NUM_MISSING_COMPONENTS})
    endif (_Boost_MISSING_COMPONENTS)

    IF( NOT Boost_LIBRARY_DIRS AND NOT _boost_CHECKED_COMPONENT )
      # Compatibility Code for backwards compatibility with CMake
      # 2.4's FindBoost module.

      # Look for the boost library path.
      # Note that the user may not have installed any libraries
      # so it is quite possible the Boost_LIBRARY_PATH may not exist.
      SET(_boost_LIB_DIR ${Boost_INCLUDE_DIR})
    
      IF("${_boost_LIB_DIR}" MATCHES "boost-[0-9]+")
        GET_FILENAME_COMPONENT(_boost_LIB_DIR ${_boost_LIB_DIR} PATH)
      ENDIF ("${_boost_LIB_DIR}" MATCHES "boost-[0-9]+")
    
      IF("${_boost_LIB_DIR}" MATCHES "/include$")
        # Strip off the trailing "/include" in the path.
        GET_FILENAME_COMPONENT(_boost_LIB_DIR ${_boost_LIB_DIR} PATH)
      ENDIF("${_boost_LIB_DIR}" MATCHES "/include$")
    
      IF(EXISTS "${_boost_LIB_DIR}/lib")
        SET (_boost_LIB_DIR ${_boost_LIB_DIR}/lib)
      ELSE(EXISTS "${_boost_LIB_DIR}/lib")
        IF(EXISTS "${_boost_LIB_DIR}/stage/lib")
          SET(_boost_LIB_DIR ${_boost_LIB_DIR}/stage/lib)
        ELSE(EXISTS "${_boost_LIB_DIR}/stage/lib")
          SET(_boost_LIB_DIR "")
        ENDIF(EXISTS "${_boost_LIB_DIR}/stage/lib")
      ENDIF(EXISTS "${_boost_LIB_DIR}/lib")
    
      IF(_boost_LIB_DIR AND EXISTS "${_boost_LIB_DIR}")
        SET(Boost_LIBRARY_DIRS ${_boost_LIB_DIR} CACHE FILEPATH "Boost library directory")
      ENDIF(_boost_LIB_DIR AND EXISTS "${_boost_LIB_DIR}")

    ENDIF( NOT Boost_LIBRARY_DIRS AND NOT _boost_CHECKED_COMPONENT )

  ELSE(Boost_INCLUDE_DIR)
    SET( Boost_FOUND FALSE)
  ENDIF(Boost_INCLUDE_DIR)

  IF (Boost_FOUND)
      IF (NOT Boost_FIND_QUIETLY)
        MESSAGE(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}")
      ENDIF(NOT Boost_FIND_QUIETLY)
      IF (NOT Boost_FIND_QUIETLY)
        MESSAGE(STATUS "Found the following Boost libraries:")
      ENDIF(NOT Boost_FIND_QUIETLY)
      FOREACH ( COMPONENT  ${Boost_FIND_COMPONENTS} )
        STRING( TOUPPER ${COMPONENT} UPPERCOMPONENT )
        IF ( Boost_${UPPERCOMPONENT}_FOUND )
          IF (NOT Boost_FIND_QUIETLY)
            MESSAGE (STATUS "  ${COMPONENT}")
          ENDIF(NOT Boost_FIND_QUIETLY)
          SET(Boost_LIBRARIES ${Boost_LIBRARIES} ${Boost_${UPPERCOMPONENT}_LIBRARY})
        ENDIF ( Boost_${UPPERCOMPONENT}_FOUND )
      ENDFOREACH(COMPONENT)
  ELSE (Boost_FOUND)
      IF (Boost_FIND_REQUIRED)
        message(SEND_ERROR "Unable to find the requested Boost libraries.\n${Boost_ERROR_REASON}")
      ENDIF(Boost_FIND_REQUIRED)
  ENDIF(Boost_FOUND)

  # Under Windows, automatic linking is performed, so no need to specify the libraries.
  IF (WIN32)
    IF (NOT MINGW)
      SET(Boost_LIBRARIES "")
    ENDIF (NOT MINGW)
  ENDIF(WIN32)

  # show the Boost_INCLUDE_DIRS AND Boost_LIBRARIES variables only in the advanced view
  MARK_AS_ADVANCED(Boost_INCLUDE_DIR
      Boost_INCLUDE_DIRS
      Boost_LIBRARY_DIRS
      Boost_USE_MULTITHREADED
  )
ENDIF(_boost_IN_CACHE)


================================================
FILE: cmake/FindCUDA/make2cmake.cmake
================================================
#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
#  Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
#
#  Copyright (c) 2007-2009
#  Scientific Computing and Imaging Institute, University of Utah
#
#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
#  for the text of the license.

# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#

#######################################################################
# This converts a file written in makefile syntax into one that can be included
# by CMake.

file(READ ${input_file} depend_text)

if (${depend_text} MATCHES ".+")

  # message("FOUND DEPENDS")

  # Remember, four backslashes is escaped to one backslash in the string.
  string(REGEX REPLACE "\\\\ " " " depend_text ${depend_text})

  # This works for the nvcc -M generated dependency files.
  string(REGEX REPLACE "^.* : " "" depend_text ${depend_text})
  string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text})

  set(dependency_list "")

  foreach(file ${depend_text})

    string(REGEX REPLACE "^ +" "" file ${file})

    if(NOT IS_DIRECTORY ${file})
      # If softlinks start to matter, we should change this to REALPATH.  For now we need
      # to flatten paths, because nvcc can generate stuff like /bin/../include instead of
      # just /include.
      get_filename_component(file_absolute "${file}" ABSOLUTE)
      list(APPEND dependency_list "${file_absolute}")
    endif(NOT IS_DIRECTORY ${file})

  endforeach(file)

else()
  # message("FOUND NO DEPENDS")
endif()

# Remove the duplicate entries and sort them.
list(REMOVE_DUPLICATES dependency_list)
list(SORT dependency_list)

foreach(file ${dependency_list})
  set(cuda_nvcc_depend "${cuda_nvcc_depend} \"${file}\"\n")
endforeach()

file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n")


================================================
FILE: cmake/FindCUDA/parse_cubin.cmake
================================================
#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
#  Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
#
#  Copyright (c) 2007-2009
#  Scientific Computing and Imaging Institute, University of Utah
#
#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
#  for the text of the license.

# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#

#######################################################################
# Parses a .cubin file produced by nvcc and reports statistics about the file.


file(READ ${input_file} file_text)

if (${file_text} MATCHES ".+")

  # Remember, four backslashes is escaped to one backslash in the string.
  string(REGEX REPLACE ";" "\\\\;" file_text ${file_text})
  string(REGEX REPLACE "\ncode" ";code" file_text ${file_text})

  list(LENGTH file_text len)

  foreach(line ${file_text})

    # Only look at "code { }" blocks.
    if(line MATCHES "^code")

      # Break into individual lines.
      string(REGEX REPLACE "\n" ";" line ${line})

      foreach(entry ${line})

        # Extract kernel names.
        if (${entry} MATCHES "[^g]name = ([^ ]+)")
          string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry})

          # Check to see if the kernel name starts with "_"
          set(skip FALSE)
          # if (${entry} MATCHES "^_")
            # Skip the rest of this block.
            # message("Skipping ${entry}")
            # set(skip TRUE)
          # else (${entry} MATCHES "^_")
            message("Kernel:    ${entry}")
          # endif (${entry} MATCHES "^_")

        endif(${entry} MATCHES "[^g]name = ([^ ]+)")

        # Skip the rest of the block if necessary
        if(NOT skip)

          # Registers
          if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)")
            string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry})
            message("Registers: ${entry}")
          endif()

          # Local memory
          if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)")
            string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry})
            message("Local:     ${entry}")
          endif()

          # Shared memory
          if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)")
            string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry})
            message("Shared:    ${entry}")
          endif()

          if (${entry} MATCHES "^}")
            message("")
          endif()

        endif(NOT skip)


      endforeach(entry)

    endif(line MATCHES "^code")

  endforeach(line)

else()
  # message("FOUND NO DEPENDS")
endif()


================================================
FILE: cmake/FindCUDA/run_nvcc.cmake
================================================
#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
#
#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
#
#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
#  for the text of the license.

# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.


##########################################################################
# This file runs the nvcc commands to produce the desired output file along with
# the dependency file needed by CMake to compute dependencies.  In addition the
# file checks the output of each command and if the command fails it deletes the
# output files.

# Input variables
#
# verbose:BOOL=<>          OFF: Be as quiet as possible (default)
#                          ON : Describe each step
#
# build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
#                               RelWithDebInfo, but it should match one of the
#                               entries in CUDA_HOST_FLAGS. This is the build
#                               configuration used when compiling the code.  If
#                               blank or unspecified Debug is assumed as this is
#                               what CMake does.
#
# generated_file:STRING=<> File to generate.  This argument must be passed in.
#
# generated_cubin_file:STRING=<> File to generate.  This argument must be passed
#                                                   in if build_cubin is true.

if(NOT generated_file)
  message(FATAL_ERROR "You must specify generated_file on the command line")
endif()

# Set these up as variables to make reading the generated file easier
set(CMAKE_COMMAND "@CMAKE_COMMAND@")
set(source_file "@source_file@")
set(NVCC_generated_dependency_file "@NVCC_generated_dependency_file@")
set(cmake_dependency_file "@cmake_dependency_file@")
set(CUDA_make2cmake "@CUDA_make2cmake@")
set(CUDA_parse_cubin "@CUDA_parse_cubin@")
set(build_cubin @build_cubin@)
# We won't actually use these variables for now, but we need to set this, in
# order to force this file to be run again if it changes.
set(generated_file_path "@generated_file_path@")
set(generated_file_internal "@generated_file@")
set(generated_cubin_file_internal "@generated_cubin_file@")

set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@")
set(CUDA_NVCC_FLAGS "@CUDA_NVCC_FLAGS@;;@CUDA_WRAP_OPTION_NVCC_FLAGS@")
@CUDA_NVCC_FLAGS_CONFIG@
set(nvcc_flags "@nvcc_flags@")
set(CUDA_NVCC_INCLUDE_ARGS "@CUDA_NVCC_INCLUDE_ARGS@")
set(format_flag "@format_flag@")

if(build_cubin AND NOT generated_cubin_file)
  message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
endif()

# This is the list of host compilation flags.  It C or CXX should already have
# been chosen by FindCUDA.cmake.
@CUDA_HOST_FLAGS@

# Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
set(nvcc_host_compiler_flags "")
# If we weren't given a build_configuration, use Debug.
if(NOT build_configuration)
  set(build_configuration Debug)
endif()
string(TOUPPER "${build_configuration}" build_configuration)
#message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
  # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
  set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"")
endforeach()
if (nvcc_host_compiler_flags)
  set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
endif()
#message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
# Add the build specific configuration flags
list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})

if(DEFINED CCBIN)
  set(CCBIN -ccbin "${CCBIN}")
endif()

# cuda_execute_process - Executes a command with optional command echo and status message.
#
#   status  - Status message to print if verbose is true
#   command - COMMAND argument from the usual execute_process argument structure
#   ARGN    - Remaining arguments are the command with arguments
#
#   CUDA_result - return value from running the command
#
# Make this a macro instead of a function, so that things like RESULT_VARIABLE
# and other return variables are present after executing the process.
macro(cuda_execute_process status command)
  set(_command ${command})
  if(NOT _command STREQUAL "COMMAND")
    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
  endif()
  if(verbose)
    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
    # Now we need to build up our command string.  We are accounting for quotes
    # and spaces, anything else is left up to the user to fix if they want to
    # copy and paste a runnable command line.
    set(cuda_execute_process_string)
    foreach(arg ${ARGN})
      # If there are quotes, excape them, so they come through.
      string(REPLACE "\"" "\\\"" arg ${arg})
      # Args with spaces need quotes around them to get them to be parsed as a single argument.
      if(arg MATCHES " ")
        list(APPEND cuda_execute_process_string "\"${arg}\"")
      else()
        list(APPEND cuda_execute_process_string ${arg})
      endif()
    endforeach()
    # Echo the command
    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
  endif(verbose)
  # Run the command
  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
endmacro()

# Delete the target file
cuda_execute_process(
  "Removing ${generated_file}"
  COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
  )

# For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
# for dependency generation and hope for the best.
set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
set(CUDA_VERSION @CUDA_VERSION@)
if(CUDA_VERSION VERSION_LESS "3.0")
  cmake_policy(PUSH)
  # CMake policy 0007 NEW states that empty list elements are not
  # ignored.  I'm just setting it to avoid the warning that's printed.
  cmake_policy(SET CMP0007 NEW)
  # Note that this will remove all occurances of -G.
  list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
  cmake_policy(POP)
endif()

# nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
# can cause incorrect dependencies when #including files based on this macro which is
# defined in the generating passes of nvcc invokation.  We will go ahead and manually
# define this for now until a future version fixes this bug.
set(CUDACC_DEFINE -D__CUDACC__)

# Generate the dependency file
cuda_execute_process(
  "Generating dependency file: ${NVCC_generated_dependency_file}"
  COMMAND "${CUDA_NVCC_EXECUTABLE}"
  -M
  ${CUDACC_DEFINE}
  "${source_file}"
  -o "${NVCC_generated_dependency_file}"
  ${CCBIN}
  ${nvcc_flags}
  ${nvcc_host_compiler_flags}
  ${depends_CUDA_NVCC_FLAGS}
  -DNVCC
  ${CUDA_NVCC_INCLUDE_ARGS}
  )

if(CUDA_result)
  message(FATAL_ERROR "Error generating ${generated_file}")
endif()

# Generate the cmake readable dependency file to a temp file.  Don't put the
# quotes just around the filenames for the input_file and output_file variables.
# CMake will pass the quotes through and not be able to find the file.
cuda_execute_process(
  "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
  COMMAND "${CMAKE_COMMAND}"
  -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
  -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
  -P "${CUDA_make2cmake}"
  )

if(CUDA_result)
  message(FATAL_ERROR "Error generating ${generated_file}")
endif()

# Copy the file if it is different
cuda_execute_process(
  "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
  COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
  )

if(CUDA_result)
  message(FATAL_ERROR "Error generating ${generated_file}")
endif()

# Delete the temporary file
cuda_execute_process(
  "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
  COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
  )

if(CUDA_result)
  message(FATAL_ERROR "Error generating ${generated_file}")
endif()

# Generate the code
cuda_execute_process(
  "Generating ${generated_file}"
  COMMAND "${CUDA_NVCC_EXECUTABLE}"
  "${source_file}"
  ${format_flag} -o "${generated_file}"
  ${CCBIN}
  ${nvcc_flags}
  ${nvcc_host_compiler_flags}
  ${CUDA_NVCC_FLAGS}
  -DNVCC
  ${CUDA_NVCC_INCLUDE_ARGS}
  )

if(CUDA_result)
  # Since nvcc can sometimes leave half done files make sure that we delete the output file.
  cuda_execute_process(
    "Removing ${generated_file}"
    COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
    )
  message(FATAL_ERROR "Error generating file ${generated_file}")
else()
  if(verbose)
    message("Generated ${generated_file} successfully.")
  endif()
endif()

# Cubin resource report commands.
if( build_cubin )
  # Run with -cubin to produce resource usage report.
  cuda_execute_process(
    "Generating ${generated_cubin_file}"
    COMMAND "${CUDA_NVCC_EXECUTABLE}"
    "${source_file}"
    ${CUDA_NVCC_FLAGS}
    ${nvcc_flags}
    ${CCBIN}
    ${nvcc_host_compiler_flags}
    -DNVCC
    -cubin
    -o "${generated_cubin_file}"
    ${CUDA_NVCC_INCLUDE_ARGS}
    )

  # Execute the parser script.
  cuda_execute_process(
    "Executing the parser script"
    COMMAND  "${CMAKE_COMMAND}"
    -D "input_file:STRING=${generated_cubin_file}"
    -P "${CUDA_parse_cubin}"
    )

endif( build_cubin )


================================================
FILE: cmake/FindCUDA.cmake
================================================
# - Tools for building CUDA C files: libraries and build dependencies.
# This script locates the NVIDIA CUDA C tools. It should work on linux, windows,
# and mac and should be reasonably up to date with CUDA C releases.
#
# This script makes use of the standard find_package arguments of <VERSION>,
# REQUIRED and QUIET.  CUDA_FOUND will report if an acceptable version of CUDA
# was found.
#
# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the prefix
# cannot be determined by the location of nvcc in the system path and REQUIRED
# is specified to find_package(). To use a different installed version of the
# toolkit set the environment variable CUDA_BIN_PATH before running cmake
# (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default /usr/local/cuda)
# or set CUDA_TOOLKIT_ROOT_DIR after configuring.  If you change the value of
# CUDA_TOOLKIT_ROOT_DIR, various components that depend on the path will be
# relocated.
#
# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain
# platforms, or to use a cuda runtime not installed in the default location. In
# newer versions of the toolkit the cuda library is included with the graphics
# driver- be sure that the driver version matches what is needed by the cuda
# runtime version.
#
# The following variables affect the behavior of the macros in the script (in
# alphebetical order).  Note that any of these flags can be changed multiple
# times in the same directory before calling CUDA_ADD_EXECUTABLE,
# CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX or CUDA_WRAP_SRCS.
#
#  CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
#  -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
#     Note that making this different from the host code when generating object
#     or C files from CUDA code just won't work, because size_t gets defined by
#     nvcc in the generated source.  If you compile to PTX and then load the
#     file yourself, you can mix bit sizes between device and host.
#
#  CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
#  -- Set to ON if you want the custom build rule to be attached to the source
#     file in Visual Studio.  Turn OFF if you add the same cuda file to multiple
#     targets.
#
#     This allows the user to build the target from the CUDA file; however, bad
#     things can happen if the CUDA source file is added to multiple targets.
#     When performing parallel builds it is possible for the custom build
#     command to be run more than once and in parallel causing cryptic build
#     errors.  VS runs the rules for every source file in the target, and a
#     source can have only one rule no matter how many projects it is added to.
#     When the rule is run from multiple targets race conditions can occur on
#     the generated file.  Eventually everything will get built, but if the user
#     is unaware of this behavior, there may be confusion.  It would be nice if
#     this script could detect the reuse of source files across multiple targets
#     and turn the option off for the user, but no good solution could be found.
#
#  CUDA_BUILD_CUBIN (Default OFF)
#  -- Set to ON to enable and extra compilation pass with the -cubin option in
#     Device mode. The output is parsed and register, shared memory usage is
#     printed during build.
#
#  CUDA_BUILD_EMULATION (Default OFF for device mode)
#  -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
#     when CUDA_BUILD_EMULATION is TRUE.
#
#  CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
#  -- Set to the path you wish to have the generated files placed.  If it is
#     blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
#     Intermediate files will always be placed in
#     CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
#
#  CUDA_HOST_COMPILATION_CPP (Default ON)
#  -- Set to OFF for C compilation of host code.
#
#  CUDA_NVCC_FLAGS
#  CUDA_NVCC_FLAGS_<CONFIG>
#  -- Additional NVCC command line arguments.  NOTE: multiple arguments must be
#     semi-colon delimited (e.g. --compiler-options;-Wall)
#
#  CUDA_PROPAGATE_HOST_FLAGS (Default ON)
#  -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
#     dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
#     host compiler through nvcc's -Xcompiler flag.  This helps make the
#     generated host code match the rest of the system better.  Sometimes
#     certain flags give nvcc problems, and this will help you turn the flag
#     propagation off.  This does not affect the flags supplied directly to nvcc
#     via CUDA_NVCC_FLAGS or through the OPTION flags specified through
#     CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS.  Flags used for
#     shared library compilation are not affected by this flag.
#
#  CUDA_VERBOSE_BUILD (Default OFF)
#  -- Set to ON to see all the commands used when building the CUDA file.  When
#     using a Makefile generator the value defaults to VERBOSE (run make
#     VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
#     always print the output.
#
# The script creates the following macros (in alphebetical order):
#
#  CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
#  -- Adds the cufft library to the target (can be any target).  Handles whether
#     you are in emulation mode or not.
#
#  CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
#  -- Adds the cublas library to the target (can be any target).  Handles
#     whether you are in emulation mode or not.
#
#  CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
#                       [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
#  -- Creates an executable "cuda_target" which is made up of the files
#     specified.  All of the non CUDA C files are compiled using the standard
#     build rules specified by CMAKE and the cuda files are compiled to object
#     files using nvcc and the host compiler.  In addition CUDA_INCLUDE_DIRS is
#     added automatically to include_directories().  Some standard CMake target
#     calls can be used on the target after calling this macro
#     (e.g. set_target_properties and target_link_libraries), but setting
#     properties that adjust compilation flags will not affect code compiled by
#     nvcc.  Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
#     CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
#
#  CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
#                    [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
#  -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
#
#  CUDA_BUILD_CLEAN_TARGET()
#  -- Creates a convience target that deletes all the dependency files
#     generated.  You should make clean after running this target to ensure the
#     dependency files get regenerated.
#
#  CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
#                [OPTIONS ...] )
#  -- Returns a list of generated files from the input source files to be used
#     with ADD_LIBRARY or ADD_EXECUTABLE.
#
#  CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
#  -- Returns a list of PTX files generated from the input source files.
#
#  CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
#  -- Sets the directories that should be passed to nvcc
#     (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
#     files.
#
#  CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
#                   [STATIC | SHARED | MODULE] [OPTIONS ...] )
#  -- This is where all the magic happens.  CUDA_ADD_EXECUTABLE,
#     CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
#     function under the hood.
#
#     Given the list of files (file0 file1 ... fileN) this macro generates
#     custom commands that generate either PTX or linkable objects (use "PTX" or
#     "OBJ" for the format argument to switch).  Files that don't end with .cu
#     or have the HEADER_FILE_ONLY property are ignored.
#
#     The arguments passed in after OPTIONS are extra command line options to
#     give to nvcc.  You can also specify per configuration options by
#     specifying the name of the configuration followed by the options.  General
#     options must preceed configuration specific options.  Not all
#     configurations need to be specified, only the ones provided will be used.
#
#        OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
#        DEBUG -g
#        RELEASE --use_fast_math
#        RELWITHDEBINFO --use_fast_math;-g
#        MINSIZEREL --use_fast_math
#
#     For certain configurations (namely VS generating object files with
#     CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
#     be produced for the given cuda file.  This is because when you add the
#     cuda file to Visual Studio it knows that this file produces an object file
#     and will link in the resulting object file automatically.
#
#     This script will also generate a separate cmake script that is used at
#     build time to invoke nvcc.  This is for serveral reasons.
#
#       1. nvcc can return negative numbers as return values which confuses
#       Visual Studio into thinking that the command succeeded.  The script now
#       checks the error codes and produces errors when there was a problem.
#
#       2. nvcc has been known to not delete incomplete results when it
#       encounters problems.  This confuses build systems into thinking the
#       target was generated when in fact an unusable file exists.  The script
#       now deletes the output files if there was an error.
#
#       3. By putting all the options that affect the build into a file and then
#       make the build rule dependent on the file, the output files will be
#       regenerated when the options change.
#
#     This script also looks at optional arguments STATIC, SHARED, or MODULE to
#     determine when to target the object compilation for a shared library.
#     BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
#     CUDA_ADD_LIBRARY.  On some systems special flags are added for building
#     objects intended for shared libraries.  A preprocessor macro,
#     <target_name>_EXPORTS is defined when a shared library compilation is
#     detected.
#
#     Flags passed into add_definitions with -D or /D are passed along to nvcc.
#
# The script defines the following variables:
#
#  CUDA_VERSION_MAJOR    -- The major version of cuda as reported by nvcc.
#  CUDA_VERSION_MINOR    -- The minor version.
#  CUDA_VERSION
#  CUDA_VERSION_STRING   -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
#
#  CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
#  CUDA_SDK_ROOT_DIR     -- Path to the CUDA SDK.  Use this to find files in the
#                           SDK.  This script will not directly support finding
#                           specific libraries or headers, as that isn't
#                           supported by NVIDIA.  If you want to change
#                           libraries when the path changes see the
#                           FindCUDA.cmake script for an example of how to clear
#                           these variables.  There are also examples of how to
#                           use the CUDA_SDK_ROOT_DIR to locate headers or
#                           libraries, if you so choose (at your own risk).
#  CUDA_INCLUDE_DIRS     -- Include directory for cuda headers.  Added automatically
#                           for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
#  CUDA_LIBRARIES        -- Cuda RT library.
#  CUDA_CUFFT_LIBRARIES  -- Device or emulation library for the Cuda FFT
#                           implementation (alternative to:
#                           CUDA_ADD_CUFFT_TO_TARGET macro)
#  CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
#                           implementation (alterative to:
#                           CUDA_ADD_CUBLAS_TO_TARGET macro).
#
#
#  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
#  Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
#
#  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
#
#  Copyright (c) 2007-2009
#  Scientific Computing and Imaging Institute, University of Utah
#
#  This code is licensed under the MIT License.  See the FindCUDA.cmake script
#  for the text of the license.

# The MIT License
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
###############################################################################

# FindCUDA.cmake

# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3)
cmake_policy(PUSH)
cmake_minimum_required(VERSION 2.6.2)
cmake_policy(POP)

# This macro helps us find the location of helper files we will need the full path to
macro(CUDA_FIND_HELPER_FILE _name _extension)
  set(_full_name "${_name}.${_extension}")
  # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being
  # processed.  Using this variable, we can pull out the current path, and
  # provide a way to get access to the other files we need local to here.
  get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
  find_file(CUDA_${_name} ${_full_name} PATHS ${CMAKE_CURRENT_LIST_DIR}/FindCUDA NO_DEFAULT_PATH)
  if(NOT CUDA_${_name})
    set(error_message "${_full_name} not found in CMAKE_MODULE_PATH")
    if(CUDA_FIND_REQUIRED)
      message(FATAL_ERROR "${error_message}")
    else(CUDA_FIND_REQUIRED)
      if(NOT CUDA_FIND_QUIETLY)
        message(STATUS "${error_message}")
      endif(NOT CUDA_FIND_QUIETLY)
    endif(CUDA_FIND_REQUIRED)
  endif(NOT CUDA_${_name})
  # Set this variable as internal, so the user isn't bugged with it.
  set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
endmacro(CUDA_FIND_HELPER_FILE)

#####################################################################
## CUDA_INCLUDE_NVCC_DEPENDENCIES
##

# So we want to try and include the dependency file if it exists.  If
# it doesn't exist then we need to create an empty one, so we can
# include it.

# If it does exist, then we need to check to see if all the files it
# depends on exist.  If they don't then we should clear the dependency
# file and regenerate it later.  This covers the case where a header
# file has disappeared or moved.

macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file)
  set(CUDA_NVCC_DEPEND)
  set(CUDA_NVCC_DEPEND_REGENERATE FALSE)


  # Include the dependency file.  Create it first if it doesn't exist .  The
  # INCLUDE puts a dependency that will force CMake to rerun and bring in the
  # new info when it changes.  DO NOT REMOVE THIS (as I did and spent a few
  # hours figuring out why it didn't work.
  if(NOT EXISTS ${dependency_file})
    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
  endif()
  # Always include this file to force CMake to run again next
  # invocation and rebuild the dependencies.
  #message("including dependency_file = ${dependency_file}")
  include(${dependency_file})

  # Now we need to verify the existence of all the included files
  # here.  If they aren't there we need to just blank this variable and
  # make the file regenerate again.
#   if(DEFINED CUDA_NVCC_DEPEND)
#     message("CUDA_NVCC_DEPEND set")
#   else()
#     message("CUDA_NVCC_DEPEND NOT set")
#   endif()
  if(CUDA_NVCC_DEPEND)
    #message("CUDA_NVCC_DEPEND true")
    foreach(f ${CUDA_NVCC_DEPEND})
      #message("searching for ${f}")
      if(NOT EXISTS ${f})
        #message("file ${f} not found")
        set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
      endif()
    endforeach(f)
  else(CUDA_NVCC_DEPEND)
    #message("CUDA_NVCC_DEPEND false")
    # No dependencies, so regenerate the file.
    set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
  endif(CUDA_NVCC_DEPEND)

  #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}")
  # No incoming dependencies, so we need to generate them.  Make the
  # output depend on the dependency file itself, which should cause the
  # rule to re-run.
  if(CUDA_NVCC_DEPEND_REGENERATE)
    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
  endif(CUDA_NVCC_DEPEND_REGENERATE)

endmacro(CUDA_INCLUDE_NVCC_DEPENDENCIES)

###############################################################################
###############################################################################
# Setup variables' defaults
###############################################################################
###############################################################################

# Allow the user to specify if the device code is supposed to be 32 or 64 bit.
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON)
else()
  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF)
endif()
option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT})

# Attach the build rule to the source file in VS.  This option
option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file.  Enable only when the CUDA source file is added to at most one target." ON)

# Prints out extra information about the cuda file during compilation
option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF)

# Set whether we are using emulation or device mode.
option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF)

# Where to put the generated output.
set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files.  If blank it will default to the CMAKE_CURRENT_BINARY_DIR")

# Parse HOST_COMPILATION mode.
option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON)

# Extra user settable flags
set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.")

# Propagate the host flags to the host compiler via -Xcompiler
option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON)

# Specifies whether the commands used when compiling the .cu file will be printed out.
option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file.  With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)

mark_as_advanced(
  CUDA_64_BIT_DEVICE_CODE
  CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
  CUDA_GENERATED_OUTPUT_DIR
  CUDA_HOST_COMPILATION_CPP
  CUDA_NVCC_FLAGS
  CUDA_PROPAGATE_HOST_FLAGS
  )

# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we
# need to add another entry for the CMAKE_BUILD_TYPE.  We also need to add the
# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo)
# for completeness.  We need run this loop in order to accomodate the addition
# of extra configuration types.  Duplicate entries will be removed by
# REMOVE_DUPLICATES.
set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
list(REMOVE_DUPLICATES CUDA_configuration_types)
foreach(config ${CUDA_configuration_types})
    string(TOUPPER ${config} config_upper)
    set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.")
    mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper})
endforeach()

###############################################################################
###############################################################################
# Locate CUDA, Set Build Type, etc.
###############################################################################
###############################################################################

# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
# if they have then clear the cache variables, so that will be detected again.
if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
  unset(CUDA_NVCC_EXECUTABLE CACHE)
  unset(CUDA_VERSION CACHE)
  unset(CUDA_TOOLKIT_INCLUDE CACHE)
  unset(CUDA_CUDART_LIBRARY CACHE)
  unset(CUDA_CUDA_LIBRARY CACHE)
  unset(CUDA_cublas_LIBRARY CACHE)
  unset(CUDA_cublasemu_LIBRARY CACHE)
  unset(CUDA_cufft_LIBRARY CACHE)
  unset(CUDA_cufftemu_LIBRARY CACHE)
endif()

if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}")
  # No specific variables to catch.  Use this kind of code before calling
  # find_package(CUDA) to clean up any variables that may depend on this path.

  #   unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE)
  #   unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE)
endif()

# Search for the cuda distribution.
if(NOT CUDA_TOOLKIT_ROOT_DIR)

  # Search in the CUDA_BIN_PATH first.
  find_path(CUDA_TOOLKIT_ROOT_DIR
    NAMES nvcc nvcc.exe
    PATHS ENV CUDA_BIN_PATH
    DOC "Toolkit location."
    NO_DEFAULT_PATH
    )
  # Now search default paths
  find_path(CUDA_TOOLKIT_ROOT_DIR
    NAMES nvcc nvcc.exe
    PATHS /usr/local/bin
          /usr/local/cuda/bin
    DOC "Toolkit location."
    )

  if (CUDA_TOOLKIT_ROOT_DIR)
    string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})
    # We need to force this back into the cache.
    set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)
  endif(CUDA_TOOLKIT_ROOT_DIR)
  if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
    if(CUDA_FIND_REQUIRED)
      message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")
    elseif(NOT CUDA_FIND_QUIETLY)
      message(STATUS "CUDA_TOOLKIT_ROOT_DIR not found or specified")
    endif()
  endif (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
endif (NOT CUDA_TOOLKIT_ROOT_DIR)

# CUDA_NVCC_EXECUTABLE
find_program(CUDA_NVCC_EXECUTABLE
  NAMES nvcc
  PATHS "${CUDA_TOOLKIT_ROOT_DIR}/bin"
        "${CUDA_TOOLKIT_ROOT_DIR}/bin64"
  ENV CUDA_BIN_PATH
  NO_DEFAULT_PATH
  )
# Search default search paths, after we search our own set of paths.
find_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)

if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
  # Compute the version.
  execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})
  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
  mark_as_advanced(CUDA_VERSION)
endif()

# Always set this convenience variable
set(CUDA_VERSION_STRING "${CUDA_VERSION}")

# Here we need to determine if the version we found is acceptable.  We will
# assume that is unless CUDA_FIND_VERSION_EXACT or CUDA_FIND_VERSION is
# specified.  The presence of either of these options checks the version
# string and signals if the version is acceptable or not.
set(_cuda_version_acceptable TRUE)
#
if(CUDA_FIND_VERSION_EXACT AND NOT CUDA_VERSION VERSION_EQUAL CUDA_FIND_VERSION)
  set(_cuda_version_acceptable FALSE)
endif()
#
if(CUDA_FIND_VERSION       AND     CUDA_VERSION VERSION_LESS  CUDA_FIND_VERSION)
  set(_cuda_version_acceptable FALSE)
endif()
#
if(NOT _cuda_version_acceptable)
  set(_cuda_error_message "Requested CUDA version ${CUDA_FIND_VERSION}, but found unacceptable version ${CUDA_VERSION}")
  if(CUDA_FIND_REQUIRED)
    message("${_cuda_error_message}")
  elseif(NOT CUDA_FIND_QUIETLY)
    message("${_cuda_error_message}")
  endif()
endif()

# CUDA_TOOLKIT_INCLUDE
find_path(CUDA_TOOLKIT_INCLUDE
  device_functions.h # Header included in toolkit
  PATHS "${CUDA_TOOLKIT_ROOT_DIR}/include"
  ENV CUDA_INC_PATH
  NO_DEFAULT_PATH
  )
# Search default search paths, after we search our own set of paths.
find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)
mark_as_advanced(CUDA_TOOLKIT_INCLUDE)

# Set the user list of include dir to nothing to initialize it.
set (CUDA_NVCC_INCLUDE_ARGS_USER "")
set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})

macro(FIND_LIBRARY_LOCAL_FIRST _var _names _doc)
  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
    set(_cuda_64bit_lib_dir "${CUDA_TOOLKIT_ROOT_DIR}/lib64")
  endif()
  find_library(${_var}
    NAMES ${_names}
    PATHS ${_cuda_64bit_lib_dir}
          "${CUDA_TOOLKIT_ROOT_DIR}/lib"
    ENV CUDA_LIB_PATH
    DOC ${_doc}
    NO_DEFAULT_PATH
    )
  # Search default search paths, after we search our own set of paths.
  find_library(${_var} NAMES ${_names} DOC ${_doc})
endmacro()

# CUDA_LIBRARIES
find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library")
set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
if(APPLE)
  # We need to add the path to cudart to the linker using rpath, since the
  # library name for the cuda libraries is prepended with @rpath.
  get_filename_component(_cuda_path_to_cudart "${CUDA_CUDART_LIBRARY}" PATH)
  if(_cuda_path_to_cudart)
    list(APPEND CUDA_LIBRARIES -Wl,-rpath "-Wl,${_cuda_path_to_cudart}")
  endif()
endif()

# 1.1 toolkit on linux doesn't appear to have a separate library on
# some platforms.
find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).")

# Add cuda library to the link line only if it is found.
if (CUDA_CUDA_LIBRARY)
  set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
endif(CUDA_CUDA_LIBRARY)

mark_as_advanced(
  CUDA_CUDA_LIBRARY
  CUDA_CUDART_LIBRARY
  )

#######################
# Look for some of the toolkit helper libraries
macro(FIND_CUDA_HELPER_LIBS _name)
  find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library")
  mark_as_advanced(CUDA_${_name}_LIBRARY)
endmacro(FIND_CUDA_HELPER_LIBS)

# Search for cufft and cublas libraries.
find_cuda_helper_libs(cufftemu)
find_cuda_helper_libs(cublasemu)
find_cuda_helper_libs(cufft)
find_cuda_helper_libs(cublas)

if (CUDA_BUILD_EMULATION)
  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})
  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY})
else()
  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})
  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY})
endif()

########################
# Look for the SDK stuff
find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h
  "$ENV{NVSDKCUDA_ROOT}"
  "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]"
  "/Developer/GPU\ Computing/C"
  )

# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the
# environment variables.
set(CUDA_SDK_SEARCH_PATH
  "${CUDA_SDK_ROOT_DIR}"
  "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2"
  "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2"
  "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK"
  "$ENV{HOME}/NVIDIA_CUDA_SDK"
  "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX"
  "/Developer/CUDA"
  )

# Example of how to find an include file from the CUDA_SDK_ROOT_DIR

# find_path(CUDA_CUT_INCLUDE_DIR
#   cutil.h
#   PATHS ${CUDA_SDK_SEARCH_PATH}
#   PATH_SUFFIXES "common/inc"
#   DOC "Location of cutil.h"
#   NO_DEFAULT_PATH
#   )
# # Now search system paths
# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h")

# mark_as_advanced(CUDA_CUT_INCLUDE_DIR)


# Example of how to find a library in the CUDA_SDK_ROOT_DIR

# # cutil library is called cutil64 for 64 bit builds on windows.  We don't want
# # to get these confused, so we are setting the name based on the word size of
# # the build.

# if(CMAKE_SIZEOF_VOID_P EQUAL 8)
#   set(cuda_cutil_name cutil64)
# else(CMAKE_SIZEOF_VOID_P EQUAL 8)
#   set(cuda_cutil_name cutil32)
# endif(CMAKE_SIZEOF_VOID_P EQUAL 8)

# find_library(CUDA_CUT_LIBRARY
#   NAMES cutil ${cuda_cutil_name}
#   PATHS ${CUDA_SDK_SEARCH_PATH}
#   # The new version of the sdk shows up in common/lib, but the old one is in lib
#   PATH_SUFFIXES "common/lib" "lib"
#   DOC "Location of cutil library"
#   NO_DEFAULT_PATH
#   )
# # Now search system paths
# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library")
# mark_as_advanced(CUDA_CUT_LIBRARY)
# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY})


#############################
# Check for required components
set(CUDA_FOUND TRUE)

set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL
  "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)
set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL
  "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(CUDA DEFAULT_MSG
  CUDA_TOOLKIT_ROOT_DIR
  CUDA_NVCC_EXECUTABLE
  CUDA_INCLUDE_DIRS
  CUDA_CUDART_LIBRARY
  _cuda_version_acceptable
  )


###############################################################################
###############################################################################
# Macros
###############################################################################
###############################################################################

###############################################################################
# Add include directories to pass to the nvcc command.
macro(CUDA_INCLUDE_DIRECTORIES)
  foreach(dir ${ARGN})
    list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER "-I${dir}")
  endforeach(dir ${ARGN})
endmacro(CUDA_INCLUDE_DIRECTORIES)


##############################################################################
cuda_find_helper_file(parse_cubin cmake)
cuda_find_helper_file(make2cmake cmake)
cuda_find_helper_file(run_nvcc cmake)

##############################################################################
# Separate the OPTIONS out from the sources
#
macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options)
  set( ${_sources} )
  set( ${_cmake_options} )
  set( ${_options} )
  set( _found_options FALSE )
  foreach(arg ${ARGN})
    if(arg STREQUAL "OPTIONS")
      set( _found_options TRUE )
    elseif(
        arg STREQUAL "WIN32" OR
        arg STREQUAL "MACOSX_BUNDLE" OR
        arg STREQUAL "EXCLUDE_FROM_ALL" OR
        arg STREQUAL "STATIC" OR
        arg STREQUAL "SHARED" OR
        arg STREQUAL "MODULE"
        )
      list(APPEND ${_cmake_options} "${arg}")
    else()
      if ( _found_options )
        list(APPEND ${_options} "${arg}")
      else()
        # Assume this is a file
        list(APPEND ${_sources} "${arg}")
      endif()
    endif()
  endforeach()
endmacro()

##############################################################################
# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix
#
macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix)
  set( _found_config )
  foreach(arg ${ARGN})
    # Determine if we are dealing with a perconfiguration flag
    foreach(config ${CUDA_configuration_types})
      string(TOUPPER ${config} config_upper)
      if (arg STREQUAL "${config_upper}")
        set( _found_config _${arg})
        # Set arg to nothing to keep it from being processed further
        set( arg )
      endif()
    endforeach()

    if ( arg )
      list(APPEND ${_option_prefix}${_found_config} "${arg}")
    endif()
  endforeach()
endmacro()

##############################################################################
# Helper to add the include directory for CUDA only once
function(CUDA_ADD_CUDA_INCLUDE_ONCE)
  get_directory_property(_include_directories INCLUDE_DIRECTORIES)
  set(_add TRUE)
  if(_include_directories)
    foreach(dir ${_include_directories})
      if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}")
        set(_add FALSE)
      endif()
    endforeach()
  endif()
  if(_add)
    include_directories(${CUDA_INCLUDE_DIRS})
  endif()
endfunction()

function(CUDA_BUILD_SHARED_LIBRARY shared_flag)
  set(cmake_args ${ARGN})
  # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then
  # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS.
  list(FIND cmake_args SHARED _cuda_found_SHARED)
  list(FIND cmake_args MODULE _cuda_found_MODULE)
  list(FIND cmake_args STATIC _cuda_found_STATIC)
  if( _cuda_found_SHARED GREATER -1 OR
      _cuda_found_MODULE GREATER -1 OR
      _cuda_found_STATIC GREATER -1)
    set(_cuda_build_shared_libs)
  else()
    if (BUILD_SHARED_LIBS)
      set(_cuda_build_shared_libs SHARED)
    else()
      set(_cuda_build_shared_libs STATIC)
    endif()
  endif()
  set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE)
endfunction()

##############################################################################
# This helper macro populates the following variables and setups up custom
# commands and targets to invoke the nvcc compiler to generate C or PTX source
# dependant upon the format parameter.  The compiler is invoked once with -M
# to generate a dependency file and a second time with -cuda or -ptx to generate
# a .cpp or .ptx file.
# INPUT:
#   cuda_target         - Target name
#   format              - PTX or OBJ
#   FILE1 .. FILEN      - The remaining arguments are the sources to be wrapped.
#   OPTIONS             - Extra options to NVCC
# OUTPUT:
#   generated_files     - List of generated files
##############################################################################
##############################################################################

macro(CUDA_WRAP_SRCS cuda_target format generated_files)

  if( ${format} MATCHES "PTX" )
    set( compile_to_ptx ON )
  elseif( ${format} MATCHES "OBJ")
    set( compile_to_ptx OFF )
  else()
    message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS: '${format}'.  Use OBJ or PTX.")
  endif()

  # Set up all the command line flags here, so that they can be overriden on a per target basis.

  set(nvcc_flags "")

  # Emulation if the card isn't present.
  if (CUDA_BUILD_EMULATION)
    # Emulation.
    set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g)
  else(CUDA_BUILD_EMULATION)
    # Device mode.  No flags necessary.
  endif(CUDA_BUILD_EMULATION)

  if(CUDA_HOST_COMPILATION_CPP)
    set(CUDA_C_OR_CXX CXX)
  else(CUDA_HOST_COMPILATION_CPP)
    if(CUDA_VERSION VERSION_LESS "3.0")
      set(nvcc_flags ${nvcc_flags} --host-compilation C)
    else()
      message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0.  Removing --host-compilation C flag" )
    endif()
    set(CUDA_C_OR_CXX C)
  endif(CUDA_HOST_COMPILATION_CPP)

  set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})

  if(CUDA_64_BIT_DEVICE_CODE)
    set(nvcc_flags ${nvcc_flags} -m64)
  else()
    set(nvcc_flags ${nvcc_flags} -m32)
  endif()

  # This needs to be passed in at this stage, because VS needs to fill out the
  # value of VCInstallDir from within VS.
  if(CMAKE_GENERATOR MATCHES "Visual Studio")
    if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
      # Add nvcc flag for 64b Windows
      set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" )
    endif()
  endif()

  # Figure out which configure we will use and pass that in as an argument to
  # the script.  We need to defer the decision until compilation time, because
  # for VS projects we won't know if we are making a debug or release build
  # until build time.
  if(CMAKE_GENERATOR MATCHES "Visual Studio")
    set( CUDA_build_configuration "$(ConfigurationName)" )
  else()
    set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}")
  endif()

  # Initialize our list of includes with the user ones followed by the CUDA system ones.
  set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}")
  # Get the include directories for this directory and use them for our nvcc command.
  get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES)
  if(CUDA_NVCC_INCLUDE_DIRECTORIES)
    foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES})
      list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
    endforeach()
  endif()

  # Reset these variables
  set(CUDA_WRAP_OPTION_NVCC_FLAGS)
  foreach(config ${CUDA_configuration_types})
    string(TOUPPER ${config} config_upper)
    set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})
  endforeach()

  CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN})
  CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})

  # Figure out if we are building a shared library.  BUILD_SHARED_LIBS is
  # respected in CUDA_ADD_LIBRARY.
  set(_cuda_build_shared_libs FALSE)
  # SHARED, MODULE
  list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED)
  list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE)
  if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1)
    set(_cuda_build_shared_libs TRUE)
  endif()
  # STATIC
  list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC)
  if(_cuda_found_STATIC GREATER -1)
    set(_cuda_build_shared_libs FALSE)
  endif()

  # CUDA_HOST_FLAGS
  if(_cuda_build_shared_libs)
    # If we are setting up code for a shared library, then we need to add extra flags for
    # compiling objects for shared libraries.
    set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS})
  else()
    set(CUDA_HOST_SHARED_FLAGS)
  endif()
  # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags.  We
  # always need to set the SHARED_FLAGS, though.
  if(CUDA_PROPAGATE_HOST_FLAGS)
    set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})")
  else()
    set(CUDA_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})")
  endif()

  set(CUDA_NVCC_FLAGS_CONFIG "# Build specific configuration flags")
  # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake
  foreach(config ${CUDA_configuration_types})
    string(TOUPPER ${config} config_upper)
    # CMAKE_FLAGS are strings and not lists.  By not putting quotes around CMAKE_FLAGS
    # we convert the strings to lists (like we want).

    if(CUDA_PROPAGATE_HOST_FLAGS)
      # nvcc chokes on -g3, so replace it with -g
      if(CMAKE_COMPILER_IS_GNUCC)
        string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
      else()
        set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
      endif()

      set(CUDA_HOST_FLAGS "${CUDA_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
    endif()

    # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list
    # like it is currently), we can remove the quotes around the
    # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.
    set(CUDA_NVCC_FLAGS_CONFIG "${CUDA_NVCC_FLAGS_CONFIG}\nset(CUDA_NVCC_FLAGS_${config_upper} \"${CUDA_NVCC_FLAGS_${config_upper}};;${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}}\")")
  endforeach()

  if(compile_to_ptx)
    # Don't use any of the host compilation flags for PTX targets.
    set(CUDA_HOST_FLAGS)
    set(CUDA_NVCC_FLAGS_CONFIG)
  endif()

  # Get the list of definitions from the directory property
  get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS)
  if(CUDA_NVCC_DEFINITIONS)
    foreach(_definition ${CUDA_NVCC_DEFINITIONS})
      list(APPEND nvcc_flags "-D${_definition}")
    endforeach()
  endif()

  if(_cuda_build_shared_libs)
    list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS")
  endif()

  # Determine output directory
  if(CUDA_GENERATED_OUTPUT_DIR)
    set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")
  else()
    set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")
  endif()

  # Reset the output variable
  set(_cuda_wrap_generated_files "")

  # Iterate over the macro arguments and create custom
  # commands for all the .cu files.
  foreach(file ${ARGN})
    # Ignore any file marked as a HEADER_FILE_ONLY
    get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
    if(${file} MATCHES ".*\\.cu$" AND NOT _is_header)

      # Add a custom target to generate a c or ptx file. ######################

      get_filename_component( basename ${file} NAME )
      if( compile_to_ptx )
        set(generated_file_path "${cuda_compile_output_dir}")
        set(generated_file_basename "${cuda_target}_generated_${basename}.ptx")
        set(format_flag "-ptx")
        file(MAKE_DIRECTORY "${cuda_compile_output_dir}")
      else( compile_to_ptx )
        set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")
        set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}")
        set(format_flag "-c")
      endif( compile_to_ptx )

      # Set all of our file names.  Make sure that whatever filenames that have
      # generated_file_path in them get passed in through as a command line
      # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
      # instead of configure time.
      set(generated_file "${generated_file_path}/${generated_file_basename}")
      set(cmake_dependency_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${generated_file_basename}.depend")
      set(NVCC_generated_dependency_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${generated_file_basename}.NVCC-depend")
      set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
      set(custom_target_script "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${generated_file_basename}.cmake")

      # Setup properties for obj files:
      if( NOT compile_to_ptx )
        set_source_files_properties("${generated_file}"
          PROPERTIES
          EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.
          )
      endif()

      # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path.
      get_filename_component(file_path "${file}" PATH)
      if(IS_ABSOLUTE "${file_path}")
        set(source_file "${file}")
      else()
        set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
      endif()

      # Bring in the dependencies.  Creates a variable CUDA_NVCC_DEPEND #######
      cuda_include_nvcc_dependencies(${cmake_dependency_file})

      # Convience string for output ###########################################
      if(CUDA_BUILD_EMULATION)
        set(cuda_build_type "Emulation")
      else(CUDA_BUILD_EMULATION)
        set(cuda_build_type "Device")
      endif(CUDA_BUILD_EMULATION)

      # Build the NVCC made dependency file ###################################
      set(build_cubin OFF)
      if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )
         if ( NOT compile_to_ptx )
           set ( build_cubin ON )
         endif( NOT compile_to_ptx )
      endif( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )

      # Configure the build script
      configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY)

      # So if a user specifies the same cuda file as input more than once, you
      # can have bad things happen with dependencies.  Here we check an option
      # to see if this is the behavior they want.
      if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)
        set(main_dep MAIN_DEPENDENCY ${source_file})
      else()
        set(main_dep DEPENDS ${source_file})
      endif()

      if(CUDA_VERBOSE_BUILD)
        set(verbose_output ON)
      elseif(CMAKE_GENERATOR MATCHES "Makefiles")
        set(verbose_output "$(VERBOSE)")
      else()
        set(verbose_output OFF)
      endif()

      # Create up the comment string
      file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
      if(compile_to_ptx)
        set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}")
      else()
        set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")
      endif()

      # Build the generated file and dependency file ##########################
      add_custom_command(
        OUTPUT ${generated_file}
        # These output files depend on the source_file and the contents of cmake_dependency_file
        ${main_dep}
        DEPENDS ${CUDA_NVCC_DEPEND}
        DEPENDS ${custom_target_script}
        # Make sure the output directory exists before trying to write to it.
        COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
        COMMAND ${CMAKE_COMMAND} ARGS
          -D verbose:BOOL=${verbose_output}
          ${ccbin_flags}
          -D build_configuration:STRING=${CUDA_build_configuration}
          -D "generated_file:STRING=${generated_file}"
          -D "generated_cubin_file:STRING=${generated_cubin_file}"
          -P "${custom_target_script}"
        COMMENT "${cuda_build_comment_string}"
        )

      # Make sure the build system knows the file is generated.
      set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)

      # Don't add the object file to the list of generated files if we are using
      # visual studio and we are attaching the build rule to the cuda file.  VS
      # will add our object file to the linker automatically for us.
      set(cuda_add_generated_file TRUE)

      if(NOT compile_to_ptx AND CMAKE_GENERATOR MATCHES "Visual Studio" AND CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)
        # Visual Studio 8 crashes when you close the solution when you don't add the object file.
        if(NOT CMAKE_GENERATOR MATCHES "Visual Studio 8")
          #message("Not adding ${generated_file}")
          set(cuda_add_generated_file FALSE)
        endif()
      endif()

      if(cuda_add_generated_file)
        list(APPEND _cuda_wrap_generated_files ${generated_file})
      endif()

      # Add the other files that we want cmake to clean on a cleanup ##########
      list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}")
      list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES)
      set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")

    endif(${file} MATCHES ".*\\.cu$" AND NOT _is_header)
  endforeach(file)

  # Set the return parameter
  set(${generated_files} ${_cuda_wrap_generated_files})
endmacro(CUDA_WRAP_SRCS)


###############################################################################
###############################################################################
# ADD LIBRARY
###############################################################################
###############################################################################
macro(CUDA_ADD_LIBRARY cuda_target)

  CUDA_ADD_CUDA_INCLUDE_ONCE()

  # Separate the sources from the options
  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
  CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
  # Create custom commands and targets for each file.
  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
    ${_cmake_options} ${_cuda_shared_flag}
    OPTIONS ${_options} )

  # Add the library.
  add_library(${cuda_target} ${_cmake_options}
    ${_generated_files}
    ${_sources}
    )

  target_link_libraries(${cuda_target}
    ${CUDA_LIBRARIES}
    )

  # We need to set the linker language based on what the expected generated file
  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
  set_target_properties(${cuda_target}
    PROPERTIES
    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
    )

endmacro(CUDA_ADD_LIBRARY cuda_target)


###############################################################################
###############################################################################
# ADD EXECUTABLE
###############################################################################
###############################################################################
macro(CUDA_ADD_EXECUTABLE cuda_target)

  CUDA_ADD_CUDA_INCLUDE_ONCE()

  # Separate the sources from the options
  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
  # Create custom commands and targets for each file.
  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )

  # Add the library.
  add_executable(${cuda_target} ${_cmake_options}
    ${_generated_files}
    ${_sources}
    )

  target_link_libraries(${cuda_target}
    ${CUDA_LIBRARIES}
    )

  # We need to set the linker language based on what the expected generated file
  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
  set_target_properties(${cuda_target}
    PROPERTIES
    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
    )

endmacro(CUDA_ADD_EXECUTABLE cuda_target)


###############################################################################
###############################################################################
# CUDA COMPILE
###############################################################################
###############################################################################
macro(CUDA_COMPILE generated_files)

  # Separate the sources from the options
  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
  # Create custom commands and targets for each file.
  CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options}
    OPTIONS ${_options} )

  set( ${generated_files} ${_generated_files})

endmacro(CUDA_COMPILE)


###############################################################################
###############################################################################
# CUDA COMPILE PTX
###############################################################################
###############################################################################
macro(CUDA_COMPILE_PTX generated_files)

  # Separate the sources from the options
  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
  # Create custom commands and targets for each file.
  CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options}
    OPTIONS ${_options} )

  set( ${generated_files} ${_generated_files})

endmacro(CUDA_COMPILE_PTX)

###############################################################################
###############################################################################
# CUDA ADD CUFFT TO TARGET
###############################################################################
###############################################################################
macro(CUDA_ADD_CUFFT_TO_TARGET target)
  if (CUDA_BUILD_EMULATION)
    target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY})
  else()
    target_link_libraries(${target} ${CUDA_cufft_LIBRARY})
  endif()
endmacro()

###############################################################################
###############################################################################
# CUDA ADD CUBLAS TO TARGET
###############################################################################
###############################################################################
macro(CUDA_ADD_CUBLAS_TO_TARGET target)
  if (CUDA_BUILD_EMULATION)
    target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY})
  else()
    target_link_libraries(${target} ${CUDA_cublas_LIBRARY})
  endif()
endmacro()

###############################################################################
###############################################################################
# CUDA BUILD CLEAN TARGET
###############################################################################
###############################################################################
macro(CUDA_BUILD_CLEAN_TARGET)
  # Call this after you add all your CUDA targets, and you will get a convience
  # target.  You should also make clean after running this target to get the
  # build system to generate all the code again.

  set(cuda_clean_target_name clean_cuda_depends)
  if (CMAKE_GENERATOR MATCHES "Visual Studio")
    string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name)
  endif()
  add_custom_target(${cuda_clean_target_name}
    COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES})

  # Clear out the variable, so the next time we configure it will be empty.
  # This is useful so that the files won't persist in the list after targets
  # have been removed.
  set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
endmacro(CUDA_BUILD_CLEAN_TARGET)


================================================
FILE: cmake/FindLibJpeg.cmake
================================================
# Copyright (c) 2009, Whispersoft s.r.l.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Whispersoft s.r.l. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

# Finds LibJpeg library
#
#  LibJpeg_INCLUDE_DIR - where to find jpeglib.h, etc.
#  LibJpeg_LIBRARIES   - List of libraries when using LibJpeg.
#  LibJpeg_FOUND       - True if LibJpeg found.
#

if (LibJpeg_INCLUDE_DIR)
  # Already in cache, be silent
  set(LibJpeg_FIND_QUIETLY TRUE)
endif (LibJpeg_INCLUDE_DIR)

find_path(LibJpeg_INCLUDE_DIR jpeglib.h
  /opt/local/include
  /usr/local/include
  /usr/include
)

set(LibJpeg_NAMES jpeg)
find_library(LibJpeg_LIBRARY
  NAMES ${LibJpeg_NAMES}
  PATHS /usr/lib /usr/local/lib /opt/local/lib
)

if (LibJpeg_INCLUDE_DIR AND LibJpeg_LIBRARY)
   set(LibJpeg_FOUND TRUE)
   set( LibJpeg_LIBRARIES ${LibJpeg_LIBRARY} )
else (LibJpeg_INCLUDE_DIR AND LibJpeg_LIBRARY)
   set(LibJpeg_FOUND FALSE)
   set(LibJpeg_LIBRARIES)
endif (LibJpeg_INCLUDE_DIR AND LibJpeg_LIBRARY)

if (LibJpeg_FOUND)
   if (NOT LibJpeg_FIND_QUIETLY)
      message(STATUS "Found LibJpeg: ${LibJpeg_LIBRARY}")
   endif (NOT LibJpeg_FIND_QUIETLY)
else (LibJpeg_FOUND)
   if (LibJpeg_FIND_REQUIRED)
      message(STATUS "Looked for LibJpeg libraries named ${LibJpeg_NAMES}.")
      message(STATUS "Include file detected: [${LibJpeg_INCLUDE_DIR}].")
      message(STATUS "Lib file detected: [${LibJpeg_LIBRARY}].")
      message(FATAL_ERROR "=========> Could NOT find LibJpeg library")
   endif (LibJpeg_FIND_REQUIRED)
endif (LibJpeg_FOUND)

mark_as_advanced(
  LibJpeg_LIBRARY
  LibJpeg_INCLUDE_DIR
  )


================================================
FILE: cmake/FindMPICH2.cmake
================================================
# - Message Passing Interface (MPI) module.
# 
# The Message Passing Interface (MPI) is a library used to write
# high-performance parallel applications that use message passing, and
# is typically deployed on a cluster. MPI is a standard interface
# (defined by the MPI forum) for which many implementations are
# available. All of these implementations have somewhat different
# compilation approaches (different include paths, libraries to link
# against, etc.), and this module tries to smooth out those differences.
#
# This module will set the following variables:
#   MPI_FOUND                  TRUE if we have found MPI
#   MPI_COMPILE_FLAGS          Compilation flags for MPI programs
#   MPI_INCLUDE_PATH           Include path(s) for MPI header
#   MPI_LINK_FLAGS             Linking flags for MPI programs
#   MPI_LIBRARY                First MPI library to link against (cached)
#   MPI_EXTRA_LIBRARY          Extra MPI libraries to link against (cached)
#   MPI_LIBRARIES              All libraries to link MPI programs against
#   MPIEXEC                    Executable for running MPI programs
#   MPIEXEC_NUMPROC_FLAG       Flag to pass to MPIEXEC before giving it the
#                              number of processors to run on
#   MPIEXEC_PREFLAGS           Flags to pass to MPIEXEC directly before the
#                              executable to run.
#   MPIEXEC_POSTFLAGS          Flags to pass to MPIEXEC after all other flags.
#
# This module will attempt to auto-detect these settings, first by
# looking for a MPI compiler, which many MPI implementations provide
# as a pass-through to the native compiler to simplify the compilation
# of MPI programs. The MPI compiler is stored in the cache variable
# MPI_COMPILER, and will attempt to look for commonly-named drivers
# mpic++, mpicxx, mpiCC, or mpicc. If the compiler driver is found and
# recognized, it will be used to set all of the module variables. To
# skip this auto-detection, set MPI_LIBRARY and MPI_INCLUDE_PATH in
# the CMake cache.
#
# If no compiler driver is found or the compiler driver is not
# recognized, this module will then search for common include paths
# and library names to try to detect MPI. 
#
# If CMake initially finds a different MPI than was intended, and you
# want to use the MPI compiler auto-detection for a different MPI
# implementation, set MPI_COMPILER to the MPI compiler driver you want
# to use (e.g., mpicxx) and then set MPI_LIBRARY to the string
# MPI_LIBRARY-NOTFOUND. When you re-configure, auto-detection of MPI
# will run again with the newly-specified MPI_COMPILER.
#
# When using MPIEXEC to execute MPI applications, you should typically
# use all of the MPIEXEC flags as follows:
#   ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} PROCS ${MPIEXEC_PREFLAGS} EXECUTABLE
#     ${MPIEXEC_POSTFLAGS} ARGS
# where PROCS is the number of processors on which to execute the program,
# EXECUTABLE is the MPI program, and ARGS are the arguments to pass to the 
# MPI program.

# Try to find the MPI driver program
find_program(MPI_COMPILER 
  NAMES mpicxx
  DOC "MPI compiler. Used only to detect MPI compilation flags.")
mark_as_advanced(MPI_COMPILER)

find_program(MPIEXEC
  NAMES mpiexec 
  DOC "Executable for running MPI programs.")

set(MPIEXEC_NUMPROC_FLAG "-np" CACHE STRING "Flag used by MPI to specify the number of processes for MPIEXEC; the next option will be the number of processes.")
set(MPIEXEC_PREFLAGS "" CACHE STRING "These flags will be directly before the executable that is being run by MPIEXEC.")
set(MPIEXEC_POSTFLAGS "" CACHE STRING "These flags will come after all flags given to MPIEXEC.")
set(MPIEXEC_MAX_NUMPROCS "2" CACHE STRING "Maximum number of processors available to run MPI applications.")
mark_as_advanced(MPIEXEC MPIEXEC_NUMPROC_FLAG MPIEXEC_PREFLAGS 
  MPIEXEC_POSTFLAGS MPIEXEC_MAX_NUMPROCS)

if (MPI_INCLUDE_PATH AND MPI_LIBRARY)
  # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in
  # the cache, and we don't want to override those settings.
elseif (MPI_COMPILER)
  # Check whether the -showme:compile option works. This indicates
  # that we have either Open MPI or a newer version of LAM-MPI, and
  # implies that -showme:link will also work.
  exec_program(${MPI_COMPILER} 
    ARGS -showme:compile 
    OUTPUT_VARIABLE MPI_COMPILE_CMDLINE
    RETURN_VALUE MPI_COMPILER_RETURN)

  if (MPI_COMPILER_RETURN EQUAL 0)
    # If we appear to have -showme:compile, then we should also have
    # -showme:link. Try it.
    exec_program(${MPI_COMPILER} 
      ARGS -showme:link
      OUTPUT_VARIABLE MPI_LINK_CMDLINE
      RETURN_VALUE MPI_COMPILER_RETURN)

    # Note that we probably have -showme:incdirs and -showme:libdirs
    # as well.
    set(MPI_COMPILER_MAY_HAVE_INCLIBDIRS TRUE)
  endif (MPI_COMPILER_RETURN EQUAL 0)

  if (MPI_COMPILER_RETURN EQUAL 0)
    # Do nothing: we have our command lines now
  else (MPI_COMPILER_RETURN EQUAL 0)
    # Older versions of LAM-MPI have "-showme". Try it.
    exec_program(${MPI_COMPILER} 
      ARGS -showme
      OUTPUT_VARIABLE MPI_COMPILE_CMDLINE
      RETURN_VALUE MPI_COMPILER_RETURN)
  endif (MPI_COMPILER_RETURN EQUAL 0)  

  if (MPI_COMPILER_RETURN EQUAL 0)
    # Do nothing: we have our command lines now
  else (MPI_COMPILER_RETURN EQUAL 0)
    # MPICH uses "-show". Try it.
    exec_program(${MPI_COMPILER} 
      ARGS -show
      OUTPUT_VARIABLE MPI_COMPILE_CMDLINE
      RETURN_VALUE MPI_COMPILER_RETURN)
  endif (MPI_COMPILER_RETURN EQUAL 0)  

  if (MPI_COMPILER_RETURN EQUAL 0)
    # We have our command lines, but we might need to copy
    # MPI_COMPILE_CMDLINE into MPI_LINK_CMDLINE, if the underlying
    if (NOT MPI_LINK_CMDLINE)
      SET(MPI_LINK_CMDLINE ${MPI_COMPILE_CMDLINE})
    endif (NOT MPI_LINK_CMDLINE)
  else (MPI_COMPILER_RETURN EQUAL 0)
    message(STATUS "Unable to determine MPI from MPI driver ${MPI_COMPILER}")
  endif (MPI_COMPILER_RETURN EQUAL 0)
endif (MPI_INCLUDE_PATH AND MPI_LIBRARY)

if (MPI_INCLUDE_PATH AND MPI_LIBRARY)
  # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in
  # the cache, and we don't want to override those settings.
elseif (MPI_COMPILE_CMDLINE)
  # Extract compile flags from the compile command line.
  string(REGEX MATCHALL "-D([^\" ]+|\"[^\"]+\")" MPI_ALL_COMPILE_FLAGS "${MPI_COMPILE_CMDLINE}")
  set(MPI_COMPILE_FLAGS_WORK)
  foreach(FLAG ${MPI_ALL_COMPILE_FLAGS})
    if (MPI_COMPILE_FLAGS_WORK)
      set(MPI_COMPILE_FLAGS_WORK "${MPI_COMPILE_FLAGS_WORK} ${FLAG}")
    else(MPI_COMPILE_FLAGS_WORK)
      set(MPI_COMPILE_FLAGS_WORK ${FLAG})
    endif(MPI_COMPILE_FLAGS_WORK)
  endforeach(FLAG)

  # Extract include paths from compile command line
  string(REGEX MATCHALL "-I([^\" ]+|\"[^\"]+\")" MPI_ALL_INCLUDE_PATHS "${MPI_COMPILE_CMDLINE}")
  set(MPI_INCLUDE_PATH_WORK)
  foreach(IPATH ${MPI_ALL_INCLUDE_PATHS})
    string(REGEX REPLACE "^-I" "" IPATH ${IPATH})
    string(REGEX REPLACE "//" "/" IPATH ${IPATH})
    list(APPEND MPI_INCLUDE_PATH_WORK ${IPATH})
  endforeach(IPATH)
  
  if (NOT MPI_INCLUDE_PATH_WORK)
    if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS)
      # The compile command line didn't have any include paths on it,
      # but we may have -showme:incdirs. Use it.
      exec_program(${MPI_COMPILER} 
        ARGS -showme:incdirs
        OUTPUT_VARIABLE MPI_INCLUDE_PATH_WORK
        RETURN_VALUE MPI_COMPILER_RETURN)
      separate_arguments(MPI_INCLUDE_PATH_WORK)
    endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS)
  endif (NOT MPI_INCLUDE_PATH_WORK)

  if (NOT MPI_INCLUDE_PATH_WORK)
    # If all else fails, just search for mpi.h in the normal include
    # paths.
    find_path(MPI_INCLUDE_PATH mpi.h)
    set(MPI_INCLUDE_PATH_WORK ${MPI_INCLUDE_PATH})
  endif (NOT MPI_INCLUDE_PATH_WORK)

  # Extract linker paths from the link command line
  string(REGEX MATCHALL "-L([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_PATHS "${MPI_LINK_CMDLINE}")
  set(MPI_LINK_PATH)
  foreach(LPATH ${MPI_ALL_LINK_PATHS})
    string(REGEX REPLACE "^-L" "" LPATH ${LPATH})
    string(REGEX REPLACE "//" "/" LPATH ${LPATH})
    list(APPEND MPI_LINK_PATH ${LPATH})
  endforeach(LPATH)

  if (NOT MPI_LINK_PATH)
    if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS)
      # The compile command line didn't have any linking paths on it,
      # but we may have -showme:libdirs. Use it.
      exec_program(${MPI_COMPILER} 
        ARGS -showme:libdirs
        OUTPUT_VARIABLE MPI_LINK_PATH
        RETURN_VALUE MPI_COMPILER_RETURN)
      separate_arguments(MPI_LINK_PATH)
    endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS)
  endif (NOT MPI_LINK_PATH)

  # Extract linker flags from the link command line
  string(REGEX MATCHALL "-Wl,([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_FLAGS "${MPI_LINK_CMDLINE}")
  set(MPI_LINK_FLAGS_WORK)
  foreach(FLAG ${MPI_ALL_LINK_FLAGS})
    if (MPI_LINK_FLAGS_WORK)
      set(MPI_LINK_FLAGS_WORK "${MPI_LINK_FLAGS_WORK} ${FLAG}")
    else(MPI_LINK_FLAGS_WORK)
      set(MPI_LINK_FLAGS_WORK ${FLAG})
    endif(MPI_LINK_FLAGS_WORK)
  endforeach(FLAG)

  # Extract the set of libraries to link against from the link command
  # line
  string(REGEX MATCHALL "-l([^\" ]+|\"[^\"]+\")" MPI_LIBNAMES "${MPI_LINK_CMDLINE}")
  message(STATUS "MPI libnames: ${MPI_LIBNAMES}")
  # Determine full path names for all of the libraries that one needs
  # to link against in an MPI program
  set(MPI_LIBRARIES)
  foreach(LIB ${MPI_LIBNAMES})
    string(REGEX REPLACE "^-l" "" LIB ${LIB})
    set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE FILEPATH "Cleared" FORCE)
    find_library(MPI_LIB ${LIB} HINTS ${MPI_LINK_PATH})
    if (MPI_LIB)
      list(APPEND MPI_LIBRARIES ${MPI_LIB})
    else (MPI_LIB)
  #    message(STATUS "Unable to find MPI library ${LIB}")
    endif (MPI_LIB)
  endforeach(LIB)
  set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE INTERNAL "Scratch variable for MPI detection" FORCE)

  message(STATUS "MPI libraries: ${MPI_LIBRARIES}")

  # Chop MPI_LIBRARIES into the old-style MPI_LIBRARY and
  # MPI_EXTRA_LIBRARY.
  list(LENGTH MPI_LIBRARIES MPI_NUMLIBS)
  list(LENGTH MPI_LIBNAMES MPI_NUMLIBS_EXPECTED)
#  if (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED)
    list(GET MPI_LIBRARIES 0 MPI_LIBRARY_WORK)
    set(MPI_LIBRARY ${MPI_LIBRARY_WORK} CACHE FILEPATH "MPI library to link against" FORCE)
#  else (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED)
#    set(MPI_LIBRARY "MPI_LIBRARY-NOTFOUND" CACHE FILEPATH "MPI library to link against" FORCE)
#  endif (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED)
  if (MPI_NUMLIBS GREATER 1)
    set(MPI_EXTRA_LIBRARY_WORK ${MPI_LIBRARIES})
    list(REMOVE_AT MPI_EXTRA_LIBRARY_WORK 0)
    set(MPI_EXTRA_LIBRARY ${MPI_EXTRA_LIBRARY_WORK} CACHE STRING "Extra MPI libraries to link against" FORCE)
  else (MPI_NUMLIBS GREATER 1)
    set(MPI_EXTRA_LIBRARY "MPI_EXTRA_LIBRARY-NOTFOUND" CACHE STRING "Extra MPI libraries to link against" FORCE)
  endif (MPI_NUMLIBS GREATER 1)

  # Set up all of the appropriate cache entries
  set(MPI_COMPILE_FLAGS ${MPI_COMPILE_FLAGS_WORK} CACHE STRING "MPI compilation flags" FORCE)
  set(MPI_INCLUDE_PATH ${MPI_INCLUDE_PATH_WORK} CACHE STRING "MPI include path" FORCE)
  set(MPI_LINK_FLAGS ${MPI_LINK_FLAGS_WORK} CACHE STRING "MPI linking flags" FORCE)
else (MPI_COMPILE_CMDLINE)
  find_path(MPI_INCLUDE_PATH mpi.h 
    /usr/local/include 
    /usr/include 
    /usr/include/mpi
    /usr/include/mpich2
    /usr/local/mpi/include
    "C:/Program Files/MPICH/SDK/Include" 
    "$ENV{SystemDrive}/Program Files/MPICH2/include"
    "$ENV{SystemDrive}/Program Files/Microsoft Compute Cluster Pack/Include"
    )
  
  # Decide between 32-bit and 64-bit libraries for Microsoft's MPI
  if (CMAKE_CL_64)
    set(MS_MPI_ARCH_DIR amd64)
  else (CMAKE_CL_64)
    set(MS_MPI_ARCH_DIR i386)
  endif (CMAKE_CL_64)
  
  find_library(MPI_LIBRARY 
    NAMES mpi mpich msmpi
    PATHS /usr/lib /usr/local/lib /usr/local/mpi/lib 
    "C:/Program Files/MPICH/SDK/Lib" 
    "$ENV{SystemDrive}/Program Files/MPICH/SDK/Lib"
    "$ENV{SystemDrive}/Program Files/Microsoft Compute Cluster Pack/Lib/${MS_MPI_ARCH_DIR}"
    )
  find_library(MPI_LIBRARY 
    NAMES mpich2
    PATHS
    "$ENV{SystemDrive}/Program Files/MPICH2/Lib")

  find_library(MPI_EXTRA_LIBRARY 
    NAMES mpi++
    PATHS /usr/lib /usr/local/lib /usr/local/mpi/lib 
    "C:/Program Files/MPICH/SDK/Lib" 
    DOC "Extra MPI libraries to link against.")

  set(MPI_COMPILE_FLAGS "" CACHE STRING "MPI compilation flags")
  set(MPI_LINK_FLAGS "" CACHE STRING "MPI linking flags")
endif (MPI_INCLUDE_PATH AND MPI_LIBRARY)

# on BlueGene/L the MPI lib is named libmpich.rts.a, there also these additional libs are required
if("${MPI_LIBRARY}" MATCHES "mpich.rts")
   set(MPI_EXTRA_LIBRARY ${MPI_EXTRA_LIBRARY} msglayer.rts devices.rts rts.rts devices.rts)
   set(MPI_LIBRARY ${MPI_LIBRARY}  msglayer.rts devices.rts rts.rts devices.rts)
endif("${MPI_LIBRARY}" MATCHES "mpich.rts")

# Set up extra variables to conform to 
if (MPI_EXTRA_LIBRARY)
  set(MPI_LIBRARIES ${MPI_LIBRARY} ${MPI_EXTRA_LIBRARY})
else (MPI_EXTRA_LIBRARY)
  set(MPI_LIBRARIES ${MPI_LIBRARY})
endif (MPI_EXTRA_LIBRARY)

if (MPI_INCLUDE_PATH AND MPI_LIBRARY)
  set(MPI_FOUND TRUE)
else (MPI_INCLUDE_PATH AND MPI_LIBRARY)
  set(MPI_FOUND FALSE)
endif (MPI_INCLUDE_PATH AND MPI_LIBRARY)

include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments 
find_package_handle_standard_args(MPI DEFAULT_MSG MPI_LIBRARY MPI_INCLUDE_PATH)

mark_as_advanced(MPI_INCLUDE_PATH MPI_COMPILE_FLAGS MPI_LINK_FLAGS MPI_LIBRARY 
  MPI_EXTRA_LIBRARY)


================================================
FILE: cmake/FindMatlab.cmake
================================================
# - this module looks for Matlab
# Defines:
#  MATLAB_INCLUDE_DIR: include path for mex.h, engine.h
#  MATLAB_LIBRARIES:   required libraries: libmex, etc
#  MATLAB_MAT_LIBRARY: path to libmat.lib
#  MATLAB_MEX_LIBRARY: path to libmex.lib
#  MATLAB_MX_LIBRARY:  path to libmx.lib
#  MATLAB_ENG_LIBRARY: path to libeng.lib


SET(MATLAB_FOUND 0)
IF(WIN32)
  IF(${CMAKE_GENERATOR} MATCHES "Visual Studio .*" OR ${CMAKE_GENERATOR} MATCHES "NMake Makefiles")
    SET(MATLAB_ROOT "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MathWorks\\MATLAB\\7.0;MATLABROOT]/extern/lib/win32/microsoft/")
  ELSE(${CMAKE_GENERATOR} MATCHES "Visual Studio .*" OR ${CMAKE_GENERATOR} MATCHES "NMake Makefiles")
      IF(${CMAKE_GENERATOR} MATCHES "Borland")
        # Same here, there are also: bcc50 and bcc51 directories
        SET(MATLAB_ROOT "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MathWorks\\MATLAB\\7.0;MATLABROOT]/extern/lib/win32/microsoft/bcc54")
      ELSE(${CMAKE_GENERATOR} MATCHES "Borland")
        MESSAGE(FATAL_ERROR "Generator not compatible: ${CMAKE_GENERATOR}")
      ENDIF(${CMAKE_GENERATOR} MATCHES "Borland")
  ENDIF(${CMAKE_GENERATOR} MATCHES "Visual Studio .*" OR ${CMAKE_GENERATOR} MATCHES "NMake Makefiles")
  FIND_LIBRARY(MATLAB_MEX_LIBRARY
    libmex
    ${MATLAB_ROOT}
    )
  FIND_LIBRARY(MATLAB_MX_LIBRARY
    libmx
    ${MATLAB_ROOT}
    )
  FIND_LIBRARY(MATLAB_ENG_LIBRARY
    libeng
    ${MATLAB_ROOT}
    )
  FIND_LIBRARY(MATLAB_MAT_LIBRARY
    libmat
    ${MATLAB_ROOT}
    )

  FIND_PATH(MATLAB_INCLUDE_DIR
    "mex.h"
    "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MathWorks\\MATLAB\\7.0;MATLABROOT]/extern/include"
    )
ELSE( WIN32 )
  IF(NOT MATLAB_ROOT)
    IF($ENV{MATLAB_ROOT})
      SET(MATLAB_ROOT $ENV{MATLAB_ROOT})
    ELSE($ENV{MATLAB_ROOT})
      SET(MATLAB_ROOT /opt/matlab)
    ENDIF($ENV{MATLAB_ROOT})    
  ENDIF(NOT MATLAB_ROOT)
  IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
    # Regular x86
    SET(MATLAB_SYS
      ${MATLAB_ROOT}/bin/glnx86
      )
  ELSE(CMAKE_SIZEOF_VOID_P EQUAL 4)
    # AMD64:
    SET(MATLAB_SYS
      ${MATLAB_ROOT}/bin/glnxa64
      )
  ENDIF(CMAKE_SIZEOF_VOID_P EQUAL 4)
  FIND_LIBRARY(MATLAB_MEX_LIBRARY
    mex
    ${MATLAB_SYS}
    )
  FIND_LIBRARY(MATLAB_MX_LIBRARY
    mx
    ${MATLAB_SYS}
    )
  FIND_LIBRARY(MATLAB_MAT_LIBRARY
    mat
    ${MATLAB_SYS}
    )
  FIND_LIBRARY(MATLAB_ENG_LIBRARY
    eng
    ${MATLAB_SYS}
    )
  FIND_PATH(MATLAB_INCLUDE_DIR
    "mex.h"
    ${MATLAB_ROOT}/extern/include
    )

ENDIF(WIN32)

# This is common to UNIX and Win32:
SET(MATLAB_LIBRARIES
  ${MATLAB_MEX_LIBRARY}
  ${MATLAB_MX_LIBRARY}
  ${MATLAB_ENG_LIBRARY}
)

IF(MATLAB_INCLUDE_DIR 
    AND MATLAB_MEX_LIBRARY 
    AND MATLAB_MAT_LIBRARY
    AND MATLAB_ENG_LIBRARY
    AND MATLAB_MX_LIBRARY)
  SET(MATLAB_LIBRARIES ${MATLAB_MX_LIBRARY} ${MATLAB_MEX_LIBRARY} ${MATLAB_ENG_LIBRARY} ${MATLAB_MAT_LIBRARY})
ENDIF(MATLAB_INCLUDE_DIR 
    AND MATLAB_MEX_LIBRARY 
    AND MATLAB_MAT_LIBRARY
    AND MATLAB_ENG_LIBRARY
    AND MATLAB_MX_LIBRARY)

MARK_AS_ADVANCED(
  MATLAB_MEX_LIBRARY
  MATLAB_MX_LIBRARY
  MATLAB_ENG_LIBRARY
  MATLAB_INCLUDE_DIR
  MATLAB_ROOT
)

INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Matlab 
    MATLAB_INCLUDE_DIR 
    MATLAB_MEX_LIBRARY 
    MATLAB_MAT_LIBRARY
    MATLAB_ENG_LIBRARY
    MATLAB_MX_LIBRARY )


================================================
FILE: cmake/FindPerftools.cmake
================================================
## Obtained from:
#
# http://tracker.icir.org/bro/browser/branches/matthias/vast/cmake/FindPerftools.cmake?order=date
# 
# 

# Copyright (c) 2009, International Computer Science Institute All
# rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials
#       provided with the distribution.
#
#     * Neither the name of the International Computer Science
#       Institute the names of its contributors may be used to endorse
#       or promote products derived from this software without
#       specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY International Computer Science
# Institute ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL International Computer Science
# Institute BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.


# - Try to find Google perftools include dirs and libraries 
#
# Usage of this module as follows:
#
#     find_package(Perftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
#  PERFTOOLS_ROOT            Preferred installation prefix for searching for
#                            Perftools, set this if the module has problems
#                            finding the proper installation path.
#  PERFTOOLS_INCLUDEDIR      Set this to the include directory of the Google
#                            perftools, if the module has problems finding the
#                            installation path.
#  PERFTOOLS_LIBRARYDIR      Set this to the library directory of the Google
#                            perftools if the module has problems finding the
#                            proper installation path.
#
# Variables defined by this module:
#
#  Perftools_FOUND           System has Google perftools, this means the
#                            include dir and all the libraries were found.  
#  Perftools_INCLUDE_DIRS    Google perftools include directories. 
#  Perftools_LIBRARIES       Link these to use the Google perftools libraries.
#
#  Perftools_TCMALLOC_LIBRARY        Path to the tcmalloc library.
#  Perftools_STACKTRACE_LIBRARY      Path to the stacktrace library.
#  Perftools_PROFILER_LIBRARY        Path to the profiler library.

if (PERFTOOLS_ROOT)
    set(Perftools_ADDITIONAL_INCLUDE_SEARCH_DIRS ${PERFTOOLS_ROOT}/include)
    set(Perftools_ADDITIONAL_LIBRARY_SEARCH_DIRS ${PERFTOOLS_ROOT}/lib)
endif ()

if (PERFTOOLS_INCLUDEDIR)
    set(Perftools_ADDITIONAL_INCLUDE_SEARCH_DIRS ${PERFTOOLS_ROOT}/include)
endif ()

if (PERFTOOLS_LIBRARYDIR)
    set(Perftools_ADDITIONAL_LIBRARY_SEARCH_DIRS ${PERFTOOLS_ROOT}/lib)
endif ()


if (Perftools_LIBRARIES AND Perftools_INCLUDE_DIRS)
    # In cache already.
    set(Perftools_FOUND true)
else ()
    find_path(Perftools_INCLUDE_DIRS
        NAMES
            google/heap-profiler.h
        PATHS
            ${Perftools_ADDITIONAL_INCLUDE_SEARCH_DIRS}
            /usr/local/include
            /opt/local/include
            /sw/include
            /usr/include
        )

    # tcmalloc
    set(tcmalloc_names ${tcmalloc_names} tcmalloc)
    find_library(perftools_tcmalloc_library
        NAMES 
            ${tcmalloc_names}
        PATHS 
            ${Perftools_ADDITIONAL_LIBRARY_SEARCH_DIRS}
            /usr/local/lib
            /opt/local/lib
            /sw/lib
            /usr/lib
      )

    if (perftools_tcmalloc_library AND Perftools_INCLUDE_DIRS)
        set(Perftools_TCMALLOC_LIBRARY ${perftools_tcmalloc_library})
        set(Perftools_LIBRARIES 
            ${Perftools_LIBRARIES} ${perftools_tcmalloc_library})
        set(Perftools_FOUND true)
    else ()
        set(Perftools_FOUND false)
    endif ()


    # stacktrace
    set(stacktrace_names ${stacktrace_names} stacktrace)
    find_library(perftools_stacktrace_library
        NAMES 
            ${stacktrace_names}
        PATHS 
            ${Perftools_ADDITIONAL_LIBRARY_SEARCH_DIRS}
            /usr/local/lib
            /opt/local/lib
            /sw/lib
            /usr/lib
      )

    if (perftools_stacktrace_library AND Perftools_INCLUDE_DIRS)
        set(Perftools_STACKTRACE_LIBRARY ${perftools_stacktrace_library})
        set(Perftools_LIBRARIES 
            ${Perftools_LIBRARIES} ${perftools_stacktrace_library})
    endif ()


    # profiler
    set(profiler_names ${profiler_names} profiler)
    find_library(perftools_profiler_library
        NAMES 
            ${profiler_names}
        PATHS 
            ${Perftools_ADDITIONAL_LIBRARY_SEARCH_DIRS}
            /usr/local/lib
            /opt/local/lib
            /sw/lib
            /usr/lib
      )

    if (perftools_profiler_library AND Perftools_INCLUDE_DIRS)
        set(Perftools_PROFILER_LIBRARY ${perftools_profiler_library})
        set(Perftools_LIBRARIES 
            ${Perftools_LIBRARIES} ${perftools_profiler_library})
    endif ()

    if (Perftools_FOUND)
        if (NOT Perftools_FIND_QUIETLY)
            message(STATUS "Found Google perftools")
        endif ()
    else ()
        if (Perftools_FIND_REQUIRED)
            message(FATAL_ERROR "Could not find Google perftools")
        endif ()
    endif ()

    mark_as_advanced(
      Perftools_INCLUDE_DIRS
      Perftools_LIBRARIES
      Perftools_TCMALLOC_LIBRARY
      Perftools_STACKTRACE_LIBRARY
      Perftools_PROFILER_LIBRARY
    )
endif()


================================================
FILE: cmake/Mex_stub.cpp
================================================
// Adopted from: http://www.cmake.org/Wiki/images/7/72/Mex_stub.cpp
// and tutorial: http://www.cmake.org/Wiki/CMake:MatlabMex
// on June 9, 2010  (akyrola)

#include "mex.h"


extern void __mexFunction__(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]);
extern void __at_exit__();

static void at_exit();

void mexFunction(int nlhs, mxArray *plhs[],
		 int nrhs, const mxArray *prhs[])
{
    
  mexAtExit(&at_exit);

  __mexFunction__(nlhs, plhs, nrhs, prhs);

}

static void at_exit()
{
  __at_exit__();    
}


================================================
FILE: cmake/mex_link.sh
================================================
#!/bin/sh

OUTPUT=$1
STATIC_LIB_NAME=$OUTPUT
MOVE_LOCATION=$2
BASEDIR=$3
LINKFILES=$4
LINKER_FLAGS= -shared -Wl,--version-script,/afs/cs.cmu.edu/misc/matlab/amd64_f7/7.9/lib/matlab7/extern/lib/glnxa64/mexFunction.map -Wl,--no-undefined

echo $OUTPUT
echo $STATIC_LIB_NAME

 
echo 'Running godawful mex linking hack...'
# mex_stub.o compiled with:
g++ -g -Wall -fPIC -ansi -D_GNU_SOURCE -fPIC -fno-omit-frame-pointer  -pthread  -DMATLAB_MEX_FILE -lmx -lmex -lmat -lm -I/afs/cs.cmu.edu/local/matlab/amd64_f7/7.9/lib/matlab7/extern/include -c ${BASEDIR}/cmake/Mex_stub.cpp -o mex_stub.o
mex -g -cxx CC='gcc' CXX='g++' LD='g++' -L./ -lglib-2.0 -l$STATIC_LIB_NAME $LINKER_FLAGS -output $OUTPUT mex_stub.o $LINKFILES
#mv $OUTPUT.mexa64 $MOVE_LOCATION

#-lpthread -lgthread-2.0 -lrt -DMX_COMPAT_32

================================================
FILE: configure
================================================
#!/bin/bash
##=============================================================================
## Support code
function download_file {
  # detect wget
  echo "Downloading $2 from $1 ..."
  if [ -z `which wget` ] ; then
    if [ -z `which curl` ] ; then
      echo "Unable to find either curl or wget! Cannot proceed with
            automatic install."
      exit 1
    fi
    curl $1 -o $2
  else
    wget $1 -O $2
  fi
} # end of download file
function print_help {
  echo "Usage: ./configure [--prefix=PREFIX] [--experimental]"
  echo
  echo "  --cleanup           remove all build directories"
  echo
  echo "  --prefix=[PREFIX]   GraphLab Installation target directory. Defaults to /usr/local"
  echo
  echo "  --ide=[Xcode]       Specify the ide to use when building GraphLab."
  echo
  echo "  --no_openmp         Disables OpenMP. Disabled by default on Mac."
  echo
  echo "  --no_mpi            Disables MPI (disables distributed execution)."
  echo
  echo "  --no_jvm            Disable JVM features including HDFS integration."
  echo
  echo "  --no_tcmalloc       Disable using tcmalloc instead of malloc."
  echo
  echo "  --experimental      Turns on undocumented experimental capabilities. "
  echo
  echo "  --c++11             Turns on C++11 experimental features. "
  echo
  echo "  --vid32             Switch to 32bit vertex ids."
  echo
  echo "  -D var=value        Specify definitions to be passed on to cmake."

  exit 1
} # end of print help
function run_cleanup {
  #!/bin/bash
  echo "This script completely erases all build folders including dependencies!!!"
  echo "Are you sure you want to continue? (yes or no)"
  read yesorno;
  if [ "$yesorno" == "yes" ]; then
    echo "Removing release and debug folders";
    rm -rf release debug deps configure.deps
  else
    echo "Doing nothing!";
  fi
  exit 1
} # end of run cleanup
function unknown_option {
  echo "Unrecognized option: $1"
  echo "To get help, run ./configure --help"
  exit 1
} # end of unknown option


## this function is broken
# function check_version {
#   local version=$1 check=$2
#   local winner=$(echo -e "$version\n$check" | sed '/^$/d' | sort -nr | head -1)
#   [[ "$winner" = "$version" ]] && return 0
#   return 1
# } # end of check version

## Obtained from forum:
# http://stackoverflow.com/questions/4023830/bash-how-compare-two-strings-in-version-format
# Return  0 if version are equal
# Returns 1 if version 1 is larger
# Returns 2 if version 2 is larger
function check_version {
  if [[ $1 == $2 ]]
  then
      return 0
  fi
  local IFS=.
  local i ver1=($1) ver2=($2)
  # fill empty fields in ver1 with zeros
  for ((i=${#ver1[@]}; i<${#ver2[@]}; i++))
  do
      ver1[i]=0
  done
  for ((i=0; i<${#ver1[@]}; i++))
  do
      if [[ -z ${ver2[i]} ]]
      then
          # fill empty fields in ver2 with zeros
          ver2[i]=0
      fi
      if ((10#${ver1[i]} > 10#${ver2[i]}))
      then
          return 1
      fi
      if ((10#${ver1[i]} < 10#${ver2[i]}))
      then
          return 2
      fi
  done
  return 0
}


##=============================================================================
## Main configuration processing

## Define some defaults which are modified by the script and whatever
## is defined in configure.deps
RELEASE_DIR=release
DEBUG_DIR=debug
INSTALL_DIR=/usr/local
EXPERIMENTAL=false
GRAPHLAB_HOME=$PWD
DEPS_PREFIX=$PWD/deps/local
NO_OPENMP=false
NO_MPI=false
NO_TCMALLOC=false
CPP11=false
VID32=false
CFLAGS=""

# if mac detected, force no_openmp flags by default
if [[ $OSTYPE == darwin* ]]; then
  NO_OPENMP=true
fi


## The defaults can be overwritten be editing the configure.deps file
if [ -f configure.deps ]; then
#  source configure.deps
  # We delete the configure deps and then recreate it each time using
  # the original values along with any modifications made by the
  # configure logic.
  rm configure.deps
fi


# Parse command line configure flags ------------------------------------------
while [ $# -gt 0 ]
  do case $1 in
    --help)                 print_help=1 ;;
    --cleanup)              run_cleanup=1 ;;
    --no_openmp)            no_openmp=1 ;;
    --no_mpi)               no_mpi=1 ;;
    --no_tcmalloc)          no_tcmalloc=1 ;;
    --no_jvm)               no_jvm=1 ;;
    --experimental)         experimental=1 ;;
    --c++11)                cpp11=1 ;;
    --vid32)                vid32=1 ;;
    --prefix=*)             prefix=${1##--prefix=} ;;
    --ide=*)                ide=${1##--ide=} ;;
    -D)                     CFLAGS="$CFLAGS -D $2"; shift ;;
    *) unknown_option $1 ;;
  esac
  shift
done

if [ $print_help ]; then
  print_help;
fi


if [ $run_cleanup ]; then
  run_cleanup
fi

# Extra generator setting (passed in as an argument)
if [[ -n $ide ]]; then
  GENERATOR="-G $ide"
fi
if [ $no_openmp ]; then
  NO_OPENMP=true
fi
if [ $no_mpi ]; then
  NO_MPI=true
fi
if [ $no_tcmalloc ]; then
  NO_TCMALLOC=true
fi
if [ $experimental ]; then
  EXPERIMENTAL=true
fi
if [ $cpp11 ]; then
  CPP11=true
fi
if [ $vid32 ]; then
  VID32=true
fi

if [[ -n $prefix ]]; then
  INSTALL_DIR=$prefix
fi


# If not specified we assume gcc and g++ are the default c and c++
# compilers
if [[ -z $CC ]]; then
  CC=gcc
fi
if [[ -z $CXX ]]; then
  CXX=g++
fi


# Test java
if [[ -z $JAVAC ]]; then
  JAVAC=javac
fi
# set -e
# set -o pipefail

if ! $JAVAC -version >> /dev/null; then
  JAVAC=""
fi

if [[ -n $no_jvm ]]; then
  echo "JVM disabled"
  JAVAC=""
fi

## Begin logging in config.log
LOG_FILE=config.log
date | tee $LOG_FILE


## ===================================================================
## Setup CMake
# Automatically detect and install a sufficiently new version of
# cmake

## Install cmake
if [ `which cmake` ]; then
  #test cmake version
  echo "Testing existing cmake version..."
  currentversion=`cmake --version | awk -F "patch" '{print $1;}' | tr -dc '[0-9].'`
  echo "Detected $currentversion . Required 2.8.3"
  check_version $currentversion "2.8.3"
  if [ $? -ne 2 ]; then
    echo "CMake version is good"
    CMAKE="cmake"
  fi
fi

# CMake not found and there is a cmake in the deps directory!
if [ -z $CMAKE ] && [ -f $DEPS_PREFIX/bin/cmake ]; then
  #test cmake version
  echo "Testing existing cmake version..."
  currentversion=`$DEPS_PREFIX/bin/cmake --version | awk -F "patch" '{print $1;}' | tr -dc '[0-9].'`
  echo "Detected ${currentversion}. Required 2.8.3"
  check_version $currentversion "2.8.3"
  if [ $? -ne 2 ]; then
    echo "CMake version is good"
    CMAKE=$DEPS_PREFIX/bin/cmake
  fi
fi


if [ -z $CMAKE ]; then
  echo "This script will now proceed to download CMake and set it up in"
  echo "the local graphlab/deps directory. The GraphLab compilation "
  echo "process will be directed to use graphlab/deps/cmake."
  pushd .
  mkdir deps
  cd deps

  # get the cmake software page
  rm -f software.html
  download_file "http://www.cmake.org/cmake/resources/software.html" software.html
  # look for the first tar.gz I can download
  cmakedownload=`grep -m 1 -o -e "href=\"http://www\\.cmake.*\\.tar\\.gz\"" software.html | grep -o -e "http.*\\.tar\\.gz"`
  if [ -z "$cmakedownload" ] ; then
    echo "Unable to locate CMake package. You will have to install it yourself."
    exit 1
  fi
  rm -f cmake.tar.gz
  set -e
  download_file $cmakedownload cmake.tar.gz
  tar -xzvf cmake.tar.gz
  # cd into the extracted directory and install
  cd cmake-*
  ./configure --prefix=$DEPS_PREFIX
  make -j2
  make install
  set +e
  popd
  CMAKE=$DEPS_PREFIX/bin/cmake
  echo "CMAKE=$CMAKE" >> configure.deps
fi

## ============================================================================
# Regenerate the configure.deps
echo -e "# Release build directory:" >> configure.deps
echo -e "\t RELEASE_DIR=$RELEASE_DIR" >> configure.deps

echo -e "# Debug build directory (optimization disabled):" >> configure.deps
echo -e "\t DEBUG_DIR=$DEBUG_DIR" >> configure.deps

echo -e "# Directory in which graphlab is installed (prefix):" >> configure.deps
echo -e "\t INSTALL_DIR=$INSTALL_DIR" >> configure.deps

echo -e "# Is experimental (research) code enabled:" >> configure.deps
echo -e "\t EXPERIMENTAL=$EXPERIMENTAL" >> configure.deps

echo -e "# The graphlab home directory: " >> configure.deps
echo -e "\t GRAPHLAB_HOME=$GRAPHLAB_HOME" >> configure.deps

echo -e "# The directory in which graphlab installs external dependencies:" >> configure.deps
echo -e "\t DEPS_PREFIX=$DEPS_PREFIX" >> configure.deps

echo -e "# Use OpenMP?  This can accelerate some graph building code: " >> configure.deps
echo -e "\t NO_OPENMP=$NO_OPENMP" >> configure.deps

echo -e "# Use MPI?  Without MPI GraphLab cannot run distributed: " >> configure.deps
echo -e "\t NO_MPI=$NO_MPI" >> configure.deps

echo -e "# Use tcmalloc?  Thread-Caching Malloc improves memory allocation: " >> configure.deps
echo -e "\t NO_TCMALLOC=$NO_TCMALLOC" >> configure.deps

echo -e "# The c compiler to use: " >> configure.deps
echo -e "\t CC=$CC" >> configure.deps

echo -e "# The c++ compiler to use: " >> configure.deps
echo -e "\t CXX=$CXX" >> configure.deps

echo -e "# Any addition user defined CFLAGS: " >> configure.deps
echo -e "\t CFLAGS=$CFLAGS" >> configure.deps

echo -e "# The Java compiler: " >> configure.deps
echo -e "\t JAVAC=$JAVAC" >> configure.deps

echo -e "# The cmake binary used to geneate the project:" >> configure.deps
echo -e "\t CMAKE=$CMAKE" >> configure.deps


mkdir -p deps/local/lib

echo "======================= BUILD CONFIGURATION ========================"
echo "System Information: " | tee -a $LOG_FILE
uname -v | tee -a $LOG_FILE
echo "Compiler Information: " | tee -a $LOG_FILE
$CC --version      | tee -a $LOG_FILE
$CXX --version     | tee -a $LOG_FILE
$CMAKE --version   | tee -a $LOG_FILE
if [[ -n $JAVAC ]]; then
  $JAVAC -version  | tee -a $LOG_FILE
fi

echo "======================= Config File ================================"

cat configure.deps | tee -a $LOG_FILE


### Add addition config flags =================================================
CFLAGS="$CFLAGS -D NO_OPENMP:BOOL=$NO_OPENMP"
CFLAGS="$CFLAGS -D NO_MPI:BOOL=$NO_MPI"
CFLAGS="$CFLAGS -D NO_TCMALLOC:BOOL=$NO_TCMALLOC"
CFLAGS="$CFLAGS -D CMAKE_INSTALL_PREFIX:STRING=$INSTALL_DIR"
CFLAGS="$CFLAGS -D EXPERIMENTAL:BOOL=$EXPERIMENTAL"
CFLAGS="$CFLAGS -D CPP11:BOOL=$CPP11"
CFLAGS="$CFLAGS -D VID32:BOOL=$VID32"
if [ -z $JAVAC ]; then
  CFLAGS="$CFLAGS -D NO_JAVAC:BOOL=1"
fi

## ============================================================================
# Run Cmake


set -e
set -o pipefail


echo -e "\n\n\n======================= Release ========================" \
    | tee -a $LOG_FILE

if [ ! -d $RELEASE_DIR ]; then
    mkdir $RELEASE_DIR
fi
cd $RELEASE_DIR
rm -f CMakeCache.txt
build_cmd="$CMAKE \
    $GENERATOR \
    -D CMAKE_BUILD_TYPE=Release \
    $CFLAGS \
    ../."
echo $build_cmd | tee -a "../$LOG_FILE"
eval $build_cmd | tee -a "../$LOG_FILE"
cd $GRAPHLAB_HOME


echo -e "\n\n\n======================= Debug =========================" \
    | tee -a $LOG_FILE

if [ ! -d $DEBUG_DIR ]; then
    mkdir $DEBUG_DIR
fi
cd $DEBUG_DIR
rm -f CMakeCache.txt
build_cmd="$CMAKE \
    $GENERATOR \
    -D CMAKE_BUILD_TYPE=Debug \
    $CFLAGS \
    ../."
echo $build_cmd | tee -a ../$LOG_FILE
eval $build_cmd | tee -a ../$LOG_FILE
cd $GRAPHLAB_HOME

echo "*****************************************************"
echo "*                 Important Note                    *"
echo "*                 --------------                    *"
echo "*       You do not have to build everything!        *"
echo "*                                                   *"
echo "* Everything takes a very long time, and a ton of   *"
echo "* of memory to build. You can just cd into toolkit  *"
echo "* directory you want, and just build that. For      *"
echo "* instance, if I just want the release build of     *"
echo "* PageRank, I could:                                *"
echo "*                                                   *"
echo "*       cd release/toolkits/graph_analytics         *"
echo "*       make -j2                                    *"
echo "*                                                   *"
echo "* Use at most [RAM in GB] parallel builds. The      *"
echo "* compilation consumes a lot of RAM. i.e. if you    *"
echo "* have 4 GB of RAM, do not do more than make -j4.   *"
echo "*****************************************************"


================================================
FILE: cxxtest/cxxtest/Descriptions.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Descriptions_cpp__
#define __cxxtest__Descriptions_cpp__

#include <cxxtest/Descriptions.h>

namespace CxxTest
{
    TestDescription::~TestDescription() {}
    SuiteDescription::~SuiteDescription() {}
    WorldDescription::~WorldDescription() {}
    
    //
    // Convert total tests to string
    //
#ifndef _CXXTEST_FACTOR
    char *WorldDescription::strTotalTests( char *s ) const
    {
        numberToString( numTotalTests(), s );
        return s;
    }
#else // _CXXTEST_FACTOR
    char *WorldDescription::strTotalTests( char *s ) const
    {
        char *p = numberToString( numTotalTests(), s );

        if ( numTotalTests() <= 1 )
            return s;

        unsigned n = numTotalTests();
        unsigned numFactors = 0;

        for ( unsigned factor = 2; (factor * factor) <= n; factor += (factor == 2) ? 1 : 2 ) {
            unsigned power;

            for ( power = 0; (n % factor) == 0; n /= factor )
                ++ power;

            if ( !power )
                continue;

            p = numberToString( factor, copyString( p, (numFactors == 0) ? " = " : " * " ) );
            if ( power > 1 )
                p = numberToString( power, copyString( p, "^" ) );
            ++ numFactors;
        }

        if ( n > 1 ) {
            if ( !numFactors )
                copyString( p, tracker().failedTests() ? " :(" : tracker().warnings() ? " :|" : " :)" );
            else
                numberToString( n, copyString( p, " * " ) );
        }
        return s;
    }
#endif // _CXXTEST_FACTOR
}

#endif // __cxxtest__Descriptions_cpp__


================================================
FILE: cxxtest/cxxtest/Descriptions.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Descriptions_h__
#define __cxxtest__Descriptions_h__

//
// TestDescription, SuiteDescription and WorldDescription
// hold information about tests so they can be run and reported.
//

#include <cxxtest/LinkedList.h>

namespace CxxTest 
{
    class TestSuite;

    class TestDescription : public Link
    {
    public:
        virtual ~TestDescription();
        
        virtual const char *file() const = 0;
        virtual int line() const = 0;
        virtual const char *testName() const = 0;
        virtual const char *suiteName() const = 0;
        
        virtual void run() = 0;
        virtual bool setUp() = 0;
        virtual bool tearDown() = 0;

        virtual const TestDescription *next() const = 0;
        virtual TestDescription *next() = 0;        
    };

    class SuiteDescription : public Link
    {
    public:
        virtual ~SuiteDescription();
        
        virtual const char *file() const = 0;
        virtual int line() const = 0;
        virtual const char *suiteName() const = 0;
        virtual TestSuite *suite() const = 0;
        
        virtual unsigned numTests() const = 0;
        virtual const TestDescription &testDescription( unsigned /*i*/ ) const = 0;

        virtual TestDescription *firstTest() = 0;
        virtual const TestDescription *firstTest() const = 0;
        virtual SuiteDescription *next() = 0;
        virtual const SuiteDescription *next() const = 0;

        virtual void activateAllTests() = 0;
        virtual bool leaveOnly( const char * /*testName*/ ) = 0;

        virtual bool setUp() = 0;
        virtual bool tearDown() = 0;
    };

    class WorldDescription : public Link
    {
    public:
        virtual ~WorldDescription();
        
        virtual const char *worldName() const { return "cxxtest"; }
        virtual unsigned numSuites( void ) const = 0;
        virtual unsigned numTotalTests( void ) const = 0;
        virtual const SuiteDescription &suiteDescription( unsigned /*i*/ ) const = 0;

        enum { MAX_STRLEN_TOTAL_TESTS = 32 };
        char *strTotalTests( char * /*buffer*/ ) const;

        virtual SuiteDescription *firstSuite() = 0;
        virtual const SuiteDescription *firstSuite() const = 0;

        virtual void activateAllTests() = 0;
        virtual bool leaveOnly( const char * /*suiteName*/, const char * /*testName*/ = 0 ) = 0;
    };
}

#endif // __cxxtest__Descriptions_h__


================================================
FILE: cxxtest/cxxtest/DummyDescriptions.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#include <cxxtest/DummyDescriptions.h>

namespace CxxTest 
{
    DummyTestDescription::DummyTestDescription() {}
        
    const char *DummyTestDescription::file() const { return "<no file>"; }
    int DummyTestDescription::line() const { return 0; }
    const char *DummyTestDescription::testName() const { return "<no test>"; }
    const char *DummyTestDescription::suiteName() const { return "<no suite>"; }
    bool DummyTestDescription::setUp() { return true;}
    void DummyTestDescription::run() {}
    bool DummyTestDescription::tearDown() { return true;}

    TestDescription *DummyTestDescription::next() { return 0; }
    const TestDescription *DummyTestDescription::next() const { return 0; }
    
    DummySuiteDescription::DummySuiteDescription() : _test() {}
        
    const char *DummySuiteDescription::file() const { return "<no file>"; }
    int DummySuiteDescription::line() const { return 0; }
    const char *DummySuiteDescription::suiteName() const { return "<no suite>"; }
    TestSuite *DummySuiteDescription::suite() const { return 0; }
    unsigned DummySuiteDescription::numTests() const { return 0; }
    const TestDescription &DummySuiteDescription::testDescription( unsigned ) const { return _test; }
    SuiteDescription *DummySuiteDescription::next() { return 0; }
    TestDescription *DummySuiteDescription::firstTest() { return 0; }
    const SuiteDescription *DummySuiteDescription::next() const { return 0; }
    const TestDescription *DummySuiteDescription::firstTest() const { return 0; }
    void DummySuiteDescription::activateAllTests() {}
    bool DummySuiteDescription::leaveOnly( const char * /*testName*/ ) { return false; }
        
    bool DummySuiteDescription::setUp() { return true;}
    bool DummySuiteDescription::tearDown() { return true;}

    DummyWorldDescription::DummyWorldDescription() : _suite() {}
        
    unsigned DummyWorldDescription::numSuites( void ) const { return 0; }
    unsigned DummyWorldDescription::numTotalTests( void ) const { return 0; }
    const SuiteDescription &DummyWorldDescription::suiteDescription( unsigned ) const { return _suite; }
    SuiteDescription *DummyWorldDescription::firstSuite() { return 0; }
    const SuiteDescription *DummyWorldDescription::firstSuite() const { return 0; }
    void DummyWorldDescription::activateAllTests() {}
    bool DummyWorldDescription::leaveOnly( const char * /*suiteName*/, const char * /*testName*/ ) { return false; }
            
    bool DummyWorldDescription::setUp() { return true;}
    bool DummyWorldDescription::tearDown() { return true;}
}


================================================
FILE: cxxtest/cxxtest/DummyDescriptions.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__DummyDescriptions_h__
#define __cxxtest__DummyDescriptions_h__

//
// DummyTestDescription, DummySuiteDescription and DummyWorldDescription
//

#include <cxxtest/Descriptions.h>

namespace CxxTest 
{
    class DummyTestDescription : public TestDescription
    {
    public:
        DummyTestDescription();
        
        const char *file() const;
        int line() const;
        const char *testName() const;
        const char *suiteName() const;
        bool setUp();
        void run();
        bool tearDown();

        TestDescription *next();
        const TestDescription *next() const;
    };

    class DummySuiteDescription : public SuiteDescription
    {      
    public:
        DummySuiteDescription();
        
        const char *file() const;
        int line() const;
        const char *suiteName() const;
        TestSuite *suite() const;
        unsigned numTests() const;
        const TestDescription &testDescription( unsigned ) const;
        SuiteDescription *next();
        TestDescription *firstTest();
        const SuiteDescription *next() const;
        const TestDescription *firstTest() const;
        void activateAllTests();
        bool leaveOnly( const char * /*testName*/ );
        
        bool setUp();
        bool tearDown();

    private:
        DummyTestDescription _test;
    };

    class DummyWorldDescription : public WorldDescription
    {
    public:
        DummyWorldDescription();
        
        unsigned numSuites( void ) const;
        unsigned numTotalTests( void ) const;
        const SuiteDescription &suiteDescription( unsigned ) const;
        SuiteDescription *firstSuite();
        const SuiteDescription *firstSuite() const;
        void activateAllTests();
        bool leaveOnly( const char * /*suiteName*/, const char * /*testName*/ = 0 );
            
        bool setUp();
        bool tearDown();

    private:
        DummySuiteDescription _suite;
    };
}

#endif // __cxxtest__DummyDescriptions_h__


================================================
FILE: cxxtest/cxxtest/ErrorFormatter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__ErrorFormatter_h__
#define __cxxtest__ErrorFormatter_h__

//
// The ErrorFormatter is a TestListener that
// prints reports of the errors to an output
// stream.  Since we cannot rely on the standard
// iostreams, this header defines a base class
// analogout to std::ostream.
//

#include <cxxtest/TestRunner.h>
#include <cxxtest/TestListener.h>
#include <cxxtest/TestTracker.h>
#include <cxxtest/ValueTraits.h>
#include <cstdio>

namespace CxxTest
{
    class OutputStream
    {
    public:
        virtual ~OutputStream() {}
        virtual void flush() {};
        virtual OutputStream &operator<<( unsigned /*number*/ ) { return *this; }
        virtual OutputStream &operator<<( const char * /*string*/ ) { return *this; }

        typedef void (*Manipulator)( OutputStream & );
        
        virtual OutputStream &operator<<( Manipulator m ) { m( *this ); return *this; }
        static void endl( OutputStream &o ) { (o << "\n").flush(); }
    };

    class ErrorFormatter : public TestListener
    {
    public:
        ErrorFormatter( OutputStream *o, const char *preLine = ":", const char *postLine = "" ) :
            _dotting( true ),
            _reported( false ),
            _o(o),
            _preLine(preLine),
            _postLine(postLine)
        {
        }

        int run()
        {
            TestRunner::runAllTests( *this );
            return tracker().failedTests();
        }

        void enterWorld( const WorldDescription & /*desc*/ )
        {
            (*_o) << "Running " << totalTests;
            _o->flush();
            _dotting = true;
            _reported = false;
        }

        static void totalTests( OutputStream &o )
        {
            char s[WorldDescription::MAX_STRLEN_TOTAL_TESTS];
            const WorldDescription &wd = tracker().world();
            o << wd.strTotalTests( s ) << (wd.numTotalTests() == 1 ? " test" : " tests");
        }

        void enterSuite( const SuiteDescription & )
        {
            _reported = false;
        }

        void enterTest( const TestDescription & )
        {
            _reported = false;
        }

        void leaveTest( const TestDescription & )
        {
            if ( !tracker().testFailed() ) {
                (*_o) << ".";
                _o->flush();
                fflush(stdout);
                _dotting = true;
            }
        }

        void leaveWorld( const WorldDescription &desc )
        {
            if ( !tracker().failedTests() ) {
                (*_o) << "OK!" << endl;
                return;
            }
            newLine();
            (*_o) << "Failed " << tracker().failedTests() << " of " << totalTests << endl;
            unsigned numPassed = desc.numTotalTests() - tracker().failedTests();
            (*_o) << "Success rate: " << (numPassed * 100 / desc.numTotalTests()) << "%" << endl;
        }

        void trace( const char *file, int line, const char *expression )
        {
            stop( file, line ) << "Trace: " <<
                expression << endl;
        }

        void warning( const char *file, int line, const char *expression )
        {
            stop( file, line ) << "Warning: " <<
                expression << endl;
        }

        void failedTest( const char *file, int line, const char *expression )
        {
            stop( file, line ) << "Error: Test failed: " <<
                expression << endl;
        }

        void failedAssert( const char *file, int line, const char *expression )
        {
            stop( file, line ) << "Error: Assertion failed: " <<
                expression << endl;
        }

        void failedAssertEquals( const char *file, int line,
                                 const char *xStr, const char *yStr,
                                 const char *x, const char *y )
        {
            stop( file, line ) << "Error: Expected (" <<
                xStr << " == " << yStr << "), found (" <<
                x << " != " << y << ")" << endl;
        }

        void failedAssertSameData( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *sizeStr, const void *x,
                                   const void *y, unsigned size )
        {
            stop( file, line ) << "Error: Expected " << sizeStr << " (" << size << ") bytes to be equal at (" <<
                xStr << ") and (" << yStr << "), found:" << endl;
            dump( x, size );
            (*_o) << "     differs from" << endl;
            dump( y, size );
        }

        void failedAssertSameFiles( const char* file, int line,
                                   const char*, const char*,
                                   const char* explanation
                                   )
        {
            stop( file, line ) << "Error: " << explanation << endl;
        }

        void failedAssertDelta( const char *file, int line,
                                const char *xStr, const char *yStr, const char *dStr,
                                const char *x, const char *y, const char *d )
        {
            stop( file, line ) << "Error: Expected (" <<
                xStr << " == " << yStr << ") up to " << dStr << " (" << d << "), found (" <<
                x << " != " << y << ")" << endl;
        }

        void failedAssertDiffers( const char *file, int line,
                                  const char *xStr, const char *yStr,
                                  const char *value )
        {
            stop( file, line ) << "Error: Expected (" <<
                xStr << " != " << yStr << "), found (" <<
                value << ")" << endl;
        }

        void failedAssertLessThan( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            stop( file, line ) << "Error: Expected (" <<
                xStr << " < " << yStr << "), found (" <<
                x << " >= " << y << ")" << endl;
        }

        void failedAssertLessThanEquals( const char *file, int line,
                                         const char *xStr, const char *yStr,
                                         const char *x, const char *y )
        {
            stop( file, line ) << "Error: Expected (" <<
                xStr << " <= " << yStr << "), found (" <<
                x << " > " << y << ")" << endl;
        }

        void failedAssertRelation( const char *file, int line,
                                   const char *relation, const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            stop( file, line ) << "Error: Expected " << relation << "( " <<
                xStr << ", " << yStr << " ), found !" << relation << "( " << x << ", " << y << " )" << endl;
        }

        void failedAssertPredicate( const char *file, int line,
                                    const char *predicate, const char *xStr, const char *x )
        {
            stop( file, line ) << "Error: Expected " << predicate << "( " <<
                xStr << " ), found !" << predicate << "( " << x << " )" << endl;
        }

        void failedAssertThrows( const char *file, int line,
                                 const char *expression, const char *type,
                                 bool otherThrown )
        {
            stop( file, line ) << "Error: Expected (" << expression << ") to throw (" <<
                type << ") but it " << (otherThrown ? "threw something else" : "didn't throw") <<
                endl;
        }

        void failedAssertThrowsNot( const char *file, int line, const char *expression )
        {
            stop( file, line ) << "Error: Expected (" << expression << ") not to throw, but it did" <<
                endl;
        }

    protected:
        OutputStream *outputStream() const
        {
            return _o;
        }

    private:
        ErrorFormatter( const ErrorFormatter & );
        ErrorFormatter &operator=( const ErrorFormatter & );
        
        OutputStream &stop( const char *file, int line )
        {
            newLine();
            reportTest();
            return (*_o) << file << _preLine << line << _postLine << ": ";
        }

        void newLine( void )
        {
            if ( _dotting ) {
                (*_o) << endl;
                _dotting = false;
            }
        }

        void reportTest( void )
        {
            if( _reported )
                return;
            (*_o) << "In " << tracker().suite().suiteName() << "::" << tracker().test().testName() << ":" << endl;
            _reported = true;
        }

        void dump( const void *buffer, unsigned size )
        {
            if ( !buffer )
                dumpNull();
            else
                dumpBuffer( buffer, size );
        }

        void dumpNull()
        {
            (*_o) << "   (null)" << endl;
        }
        
        void dumpBuffer( const void *buffer, unsigned size )
        {
            unsigned dumpSize = size;
            if ( maxDumpSize() && dumpSize > maxDumpSize() )
                dumpSize = maxDumpSize();

            const unsigned char *p = (const unsigned char *)buffer;
            (*_o) << "   { ";
            for ( unsigned i = 0; i < dumpSize; ++ i )
                (*_o) << byteToHex( *p++ ) << " ";
            if ( dumpSize < size )
                (*_o) << "... ";
            (*_o) << "}" << endl;
        }

        static void endl( OutputStream &o )
        {
            OutputStream::endl( o );
        }

        bool _dotting;
        bool _reported;
        OutputStream *_o;
        const char *_preLine;
        const char *_postLine;
    };
}

#endif // __cxxtest__ErrorFormatter_h__


================================================
FILE: cxxtest/cxxtest/ErrorPrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__ErrorPrinter_h__
#define __cxxtest__ErrorPrinter_h__

//
// The ErrorPrinter is a simple TestListener that
// just prints "OK" if everything goes well, otherwise
// reports the error in the format of compiler messages.
// The ErrorPrinter uses std::cout
//

#include <cxxtest/Flags.h>

#ifndef _CXXTEST_HAVE_STD
#   define _CXXTEST_HAVE_STD
#endif // _CXXTEST_HAVE_STD

#include <cxxtest/ErrorFormatter.h>
#include <cxxtest/StdValueTraits.h>

#ifdef _CXXTEST_OLD_STD
#   include <iostream.h>
#else // !_CXXTEST_OLD_STD
#   include <iostream>
#endif // _CXXTEST_OLD_STD

namespace CxxTest 
{
    class ErrorPrinter : public ErrorFormatter
    {
    public:
        ErrorPrinter( CXXTEST_STD(ostream) &o = CXXTEST_STD(cout), const char *preLine = ":", const char *postLine = "" ) :
            ErrorFormatter( new Adapter(o), preLine, postLine ) {}
        virtual ~ErrorPrinter() { delete outputStream(); }

    private:
        class Adapter : public OutputStream
        {
            CXXTEST_STD(ostream) &_o;
        public:
            Adapter( CXXTEST_STD(ostream) &o ) : _o(o) {}
            void flush() { _o.flush(); }
            OutputStream &operator<<( const char *s ) { _o << s; return *this; }
            OutputStream &operator<<( Manipulator m ) { return OutputStream::operator<<( m ); }
            OutputStream &operator<<( unsigned i )
            {
                char s[1 + 3 * sizeof(unsigned)];
                numberToString( i, s );
                _o << s;
                return *this;
            }
        };
    };
}

#endif // __cxxtest__ErrorPrinter_h__


================================================
FILE: cxxtest/cxxtest/Flags.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Flags_h__
#define __cxxtest__Flags_h__

//
// These are the flags that control CxxTest
//

#if !defined(CXXTEST_FLAGS)
#   define CXXTEST_FLAGS
#endif // !CXXTEST_FLAGS

#if defined(CXXTEST_HAVE_EH) && !defined(_CXXTEST_HAVE_EH)
#   define _CXXTEST_HAVE_EH
#endif // CXXTEST_HAVE_EH

#if defined(CXXTEST_HAVE_STD) && !defined(_CXXTEST_HAVE_STD)
#   define _CXXTEST_HAVE_STD
#endif // CXXTEST_HAVE_STD

#if defined(CXXTEST_OLD_TEMPLATE_SYNTAX) && !defined(_CXXTEST_OLD_TEMPLATE_SYNTAX)
#   define _CXXTEST_OLD_TEMPLATE_SYNTAX
#endif // CXXTEST_OLD_TEMPLATE_SYNTAX

#if defined(CXXTEST_OLD_STD) && !defined(_CXXTEST_OLD_STD)
#   define _CXXTEST_OLD_STD
#endif // CXXTEST_OLD_STD

#if defined(CXXTEST_ABORT_TEST_ON_FAIL) && !defined(_CXXTEST_ABORT_TEST_ON_FAIL)
#   define _CXXTEST_ABORT_TEST_ON_FAIL
#endif // CXXTEST_ABORT_TEST_ON_FAIL

#if defined(CXXTEST_NO_COPY_CONST) && !defined(_CXXTEST_NO_COPY_CONST)
#   define _CXXTEST_NO_COPY_CONST
#endif // CXXTEST_NO_COPY_CONST

#if defined(CXXTEST_FACTOR) && !defined(_CXXTEST_FACTOR)
#   define _CXXTEST_FACTOR
#endif // CXXTEST_FACTOR

#if defined(CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION) && !defined(_CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION)
#   define _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#endif // CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION

#if defined(CXXTEST_LONGLONG)
#   if defined(_CXXTEST_LONGLONG)
#       undef _CXXTEST_LONGLONG
#   endif
#   define _CXXTEST_LONGLONG CXXTEST_LONGLONG
#endif // CXXTEST_LONGLONG

#ifndef CXXTEST_MAX_DUMP_SIZE
#   define CXXTEST_MAX_DUMP_SIZE 0
#endif // CXXTEST_MAX_DUMP_SIZE

#if defined(_CXXTEST_ABORT_TEST_ON_FAIL) && !defined(CXXTEST_DEFAULT_ABORT)
#   define CXXTEST_DEFAULT_ABORT true
#endif // _CXXTEST_ABORT_TEST_ON_FAIL && !CXXTEST_DEFAULT_ABORT

#if !defined(CXXTEST_DEFAULT_ABORT)
#   define CXXTEST_DEFAULT_ABORT false
#endif // !CXXTEST_DEFAULT_ABORT

#if defined(_CXXTEST_ABORT_TEST_ON_FAIL) && !defined(_CXXTEST_HAVE_EH)
#   warning "CXXTEST_ABORT_TEST_ON_FAIL is meaningless without CXXTEST_HAVE_EH"
#   undef _CXXTEST_ABORT_TEST_ON_FAIL
#endif // _CXXTEST_ABORT_TEST_ON_FAIL && !_CXXTEST_HAVE_EH

//
// Some minimal per-compiler configuration to allow us to compile
//

#ifdef __BORLANDC__
#   if __BORLANDC__ <= 0x520 // Borland C++ 5.2 or earlier
#       ifndef _CXXTEST_OLD_STD
#           define _CXXTEST_OLD_STD
#       endif
#       ifndef _CXXTEST_OLD_TEMPLATE_SYNTAX
#           define _CXXTEST_OLD_TEMPLATE_SYNTAX
#       endif
#   endif
#   if __BORLANDC__ >= 0x540 // C++ Builder 4.0 or later
#       ifndef _CXXTEST_NO_COPY_CONST
#           define _CXXTEST_NO_COPY_CONST
#       endif
#       ifndef _CXXTEST_LONGLONG
#           define _CXXTEST_LONGLONG __int64
#       endif
#   endif
#endif // __BORLANDC__

#ifdef _MSC_VER // Visual C++
#   ifndef _CXXTEST_LONGLONG
#       define _CXXTEST_LONGLONG __int64
#   endif
#   if (_MSC_VER >= 0x51E)
#       ifndef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#           define _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#       endif
#   endif
#   pragma warning( disable : 4127 )
#   pragma warning( disable : 4290 )
#   pragma warning( disable : 4511 )
#   pragma warning( disable : 4512 )
#   pragma warning( disable : 4514 )
#endif // _MSC_VER

#ifdef __GNUC__
#   if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ >= 9)
#       ifndef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#           define _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#       endif
#   endif
#   if defined(__LONG_LONG_MAX__) && !defined(__cplusplus)
#      define _CXXTEST_LONGLONG long long
#   endif
#endif // __GNUC__

#ifdef __DMC__ // Digital Mars
#   ifndef _CXXTEST_OLD_STD
#       define _CXXTEST_OLD_STD
#   endif
#endif

#ifdef __SUNPRO_CC // Sun Studio C++
#   if __SUNPRO_CC >= 0x510
#       ifndef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#           define _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#       endif
#   endif
#endif

#ifdef __xlC__ // IBM XL C/C++
// Partial specialization may be supported before 7.0.0.3, but it is
// definitely supported after.
#   if __xlC__ >= 0x0700
#       ifndef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#           define _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
#       endif
#   endif
#endif

#endif // __cxxtest__Flags_h__


================================================
FILE: cxxtest/cxxtest/GlobalFixture.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__GlobalFixture_cpp__
#define __cxxtest__GlobalFixture_cpp__

#include <cxxtest/GlobalFixture.h>

namespace CxxTest 
{
    bool GlobalFixture::setUpWorld() { return true; }
    bool GlobalFixture::tearDownWorld() { return true; }
    bool GlobalFixture::setUp() { return true; }
    bool GlobalFixture::tearDown() { return true; }
        
    GlobalFixture::GlobalFixture() { attach( _list ); }
    GlobalFixture::~GlobalFixture() { detach( _list ); }
        
    GlobalFixture *GlobalFixture::firstGlobalFixture() { return (GlobalFixture *)_list.head(); }
    GlobalFixture *GlobalFixture::lastGlobalFixture() { return (GlobalFixture *)_list.tail(); }
    GlobalFixture *GlobalFixture::nextGlobalFixture() { return (GlobalFixture *)next(); }
    GlobalFixture *GlobalFixture::prevGlobalFixture() { return (GlobalFixture *)prev(); }
}

#endif // __cxxtest__GlobalFixture_cpp__


================================================
FILE: cxxtest/cxxtest/GlobalFixture.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__GlobalFixture_h__
#define __cxxtest__GlobalFixture_h__

#include <cxxtest/LinkedList.h>

namespace CxxTest 
{
    class GlobalFixture : public Link
    {
    public:
        virtual bool setUpWorld();
        virtual bool tearDownWorld();
        virtual bool setUp();
        virtual bool tearDown();
        
        GlobalFixture();
        ~GlobalFixture();
        
        static GlobalFixture *firstGlobalFixture();
        static GlobalFixture *lastGlobalFixture();
        GlobalFixture *nextGlobalFixture();
        GlobalFixture *prevGlobalFixture();

    private:
        static List _list;
    };
}

#endif // __cxxtest__GlobalFixture_h__


================================================
FILE: cxxtest/cxxtest/Gui.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __CXXTEST__GUI_H
#define __CXXTEST__GUI_H

//
// GuiListener is a simple base class for the differes GUIs
// GuiTuiRunner<GuiT, TuiT> combines a GUI with a text-mode error formatter
//

#include <cxxtest/TeeListener.h>

namespace CxxTest
{
    class GuiListener : public TestListener
    {
    public:
        GuiListener() : _state( GREEN_BAR ) {}
        virtual ~GuiListener() {}

        virtual void runGui( int &argc, char **argv, TestListener &listener )
        {
            enterGui( argc, argv );
            TestRunner::runAllTests( listener );
            leaveGui();            
        }

        virtual void enterGui( int & /*argc*/, char ** /*argv*/ ) {}
        virtual void leaveGui() {}
        
        //
        // The easy way is to implement these functions:
        //      
        virtual void guiEnterWorld( unsigned /*numTotalTests*/ ) {}
        virtual void guiEnterSuite( const char * /*suiteName*/ ) {}
        virtual void guiEnterTest( const char * /*suiteName*/, const char * /*testName*/ ) {}
        virtual void yellowBar() {}
        virtual void redBar() {}

        //
        // The hard way is this:
        //
        void enterWorld( const WorldDescription &d ) { guiEnterWorld( d.numTotalTests() ); }
        void enterSuite( const SuiteDescription &d ) { guiEnterSuite( d.suiteName() ); }
        void enterTest( const TestDescription &d ) { guiEnterTest( d.suiteName(), d.testName() ); }
        void leaveTest( const TestDescription & ) {}
        void leaveSuite( const SuiteDescription & ) {}
        void leaveWorld( const WorldDescription & ) {}
        
        void warning( const char * /*file*/, int /*line*/, const char * /*expression*/ )
        {
            yellowBarSafe();
        }
        
        void failedTest( const char * /*file*/, int /*line*/, const char * /*expression*/ )
        {
            redBarSafe();
        }
        
        void failedAssert( const char * /*file*/, int /*line*/, const char * /*expression*/ )
        {
            redBarSafe();
        }
        
        void failedAssertEquals( const char * /*file*/, int /*line*/,
                                 const char * /*xStr*/, const char * /*yStr*/,
                                 const char * /*x*/, const char * /*y*/ )
        {
            redBarSafe();
        }

        void failedAssertSameData( const char * /*file*/, int /*line*/,
                                   const char * /*xStr*/, const char * /*yStr*/,
                                   const char * /*sizeStr*/, const void * /*x*/,
                                   const void * /*y*/, unsigned /*size*/ )
        {
            redBarSafe();
        }
        
        void failedAssertDelta( const char * /*file*/, int /*line*/,
                                const char * /*xStr*/, const char * /*yStr*/, const char * /*dStr*/,
                                const char * /*x*/, const char * /*y*/, const char * /*d*/ )
        {
            redBarSafe();
        }
        
        void failedAssertDiffers( const char * /*file*/, int /*line*/,
                                  const char * /*xStr*/, const char * /*yStr*/,
                                  const char * /*value*/ )
        {
            redBarSafe();
        }
        
        void failedAssertLessThan( const char * /*file*/, int /*line*/,
                                   const char * /*xStr*/, const char * /*yStr*/,
                                   const char * /*x*/, const char * /*y*/ )
        {
            redBarSafe();
        }
        
        void failedAssertLessThanEquals( const char * /*file*/, int /*line*/,
                                         const char * /*xStr*/, const char * /*yStr*/,
                                         const char * /*x*/, const char * /*y*/ )
        {
            redBarSafe();
        }
        
        void failedAssertPredicate( const char * /*file*/, int /*line*/,
                                    const char * /*predicate*/, const char * /*xStr*/, const char * /*x*/ )
        {
            redBarSafe();
        }
        
        void failedAssertRelation( const char * /*file*/, int /*line*/,
                                   const char * /*relation*/, const char * /*xStr*/, const char * /*yStr*/,
                                   const char * /*x*/, const char * /*y*/ )
        {
            redBarSafe();
        }
        
        void failedAssertThrows( const char * /*file*/, int /*line*/,
                                 const char * /*expression*/, const char * /*type*/,
                                 bool /*otherThrown*/ )
        {
            redBarSafe();
        }
        
        void failedAssertThrowsNot( const char * /*file*/, int /*line*/,
                                    const char * /*expression*/ )
        {
            redBarSafe();
        }

    protected:
        void yellowBarSafe()
        {
            if ( _state < YELLOW_BAR ) {
                yellowBar();
                _state = YELLOW_BAR;
            }
        }

        void redBarSafe()
        {
            if ( _state < RED_BAR ) {
                redBar();
                _state = RED_BAR;
            }
        }
        
    private:
        enum { GREEN_BAR, YELLOW_BAR, RED_BAR } _state;
    };

    template<class GuiT, class TuiT>
    class GuiTuiRunner : public TeeListener
    {
        int* _argc;
        char **_argv;
        GuiT _gui;
        TuiT _tui;
        
    public:
        GuiTuiRunner() : _argc(0), _argv(0) {}

        void process_commandline( int& argc, char** argv )
        {  
            _argc=&argc;
            _argv=argv;
            setFirst( _gui );
            setSecond( _tui );
        }

        int run()
        {
            _gui.runGui( *_argc, _argv, *this );
            return tracker().failedTests();
        }
    };
}

#endif //__CXXTEST__GUI_H


================================================
FILE: cxxtest/cxxtest/LinkedList.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__LinkedList_cpp__
#define __cxxtest__LinkedList_cpp__

#include <cxxtest/LinkedList.h>

namespace CxxTest
{
    List GlobalFixture::_list = { 0, 0 };
    List RealSuiteDescription::_suites = { 0, 0 };

    void List::initialize()
    {
        _head = _tail = 0;
    }
    
    Link *List::head()
    {
        Link *l = _head;
        while ( l && !l->active() )
            l = l->next();
        return l;
    }

    const Link *List::head() const
    {
        Link *l = _head;
        while ( l && !l->active() )
            l = l->next();
        return l;
    }

    Link *List::tail()
    {
        Link *l = _tail;
        while ( l && !l->active() )
            l = l->prev();
        return l;
    }

    const Link *List::tail() const
    {
        Link *l = _tail;
        while ( l && !l->active() )
            l = l->prev();
        return l;
    }

    bool List::empty() const
    {
        return (_head == 0);
    }

    unsigned List::size() const
    {
        unsigned count = 0;
        for ( const Link *l = head(); l != 0; l = l->next() )
            ++ count;
        return count;
    }

    Link *List::nth( unsigned n )
    {
        Link *l = head();
        while ( n -- )
            l = l->next();
        return l;
    }

    void List::activateAll()
    {
        for ( Link *l = _head; l != 0; l = l->justNext() )
            l->setActive( true );
    }

    void List::leaveOnly( const Link &link )
    {
        for ( Link *l = head(); l != 0; l = l->next() )
            if ( l != &link )
                l->setActive( false );
    }

    Link::Link() :
        _next( 0 ),
        _prev( 0 ),
        _active( true )
    {
    }

    Link::~Link()
    {
    }
    
    bool Link::active() const
    {
        return _active;
    }
    
    void Link::setActive( bool value )
    {
        _active = value;
    }

    Link * Link::justNext()
    {
        return _next;
    }
    
    Link * Link::justPrev()
    {
        return _prev;
    }
        
    Link * Link::next()
    {
        Link *l = _next;
        while ( l && !l->_active )
            l = l->_next;
        return l;
    }
    
    Link * Link::prev()
    {
        Link *l = _prev;
        while ( l && !l->_active )
            l = l->_prev;
        return l;
    }
    
    const Link * Link::next() const
    {
        Link *l = _next;
        while ( l && !l->_active )
            l = l->_next;
        return l;
    }
    
    const Link * Link::prev() const
    {
        Link *l = _prev;
        while ( l && !l->_active )
            l = l->_prev;
        return l;
    }
    
    void Link::attach( List &l )
    {
        if ( l._tail )
            l._tail->_next = this;

        _prev = l._tail;
        _next = 0;
            
        if ( l._head == 0 )
            l._head = this;
        l._tail = this;
    }

    void Link::detach( List &l )
    {
        if ( _prev )
            _prev->_next = _next;
        else
            l._head = _next;
            
        if ( _next )
            _next->_prev = _prev;
        else
            l._tail = _prev;
    }
}

#endif // __cxxtest__LinkedList_cpp__


================================================
FILE: cxxtest/cxxtest/LinkedList.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__LinkedList_h__
#define __cxxtest__LinkedList_h__

#include <cxxtest/Flags.h>

namespace CxxTest 
{
    struct List;
    class Link;

    struct List
    {
        Link *_head;
        Link *_tail;

        void initialize();

        Link *head();
        const Link *head() const;
        Link *tail();
        const Link *tail() const;

        bool empty() const;
        unsigned size() const;
        Link *nth( unsigned n );

        void activateAll();
        void leaveOnly( const Link &link );
    };

    class Link
    {       
    public:
        Link();
        virtual ~Link();

        bool active() const;
        void setActive( bool value = true );

        Link *justNext();
        Link *justPrev();
        
        Link *next();
        Link *prev();
        const Link *next() const;
        const Link *prev() const;

        void attach( List &l );
        void detach( List &l );

    private:
        Link *_next;
        Link *_prev;
        bool _active;

        Link( const Link & );
        Link &operator=( const Link & );
    };
}

#endif // __cxxtest__LinkedList_h__


================================================
FILE: cxxtest/cxxtest/Mock.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Mock_h__
#define __cxxtest__Mock_h__

namespace dummy_mock_ns {}

//
// The default namespace is T::
//
#ifndef CXXTEST_MOCK_NAMESPACE
#   define CXXTEST_MOCK_NAMESPACE T
#endif // CXXTEST_MOCK_NAMESPACE

//
// MockTraits: What to return when no mock object has been created
//
#define __CXXTEST_MOCK__TRAITS \
    namespace CXXTEST_MOCK_NAMESPACE \
    { \
        template<class T> \
        class MockTraits \
        { \
        public: \
            static T defaultValue() { return 0; } \
        }; \
    }

//
// extern "C" when needed
//
#ifdef __cplusplus
#   define CXXTEST_EXTERN_C extern "C"
#else
#   define CXXTEST_EXTERN_C
#endif // __cplusplus

//
// Prototypes: For "normal" headers
//
#define __CXXTEST_MOCK__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    namespace CXXTEST_MOCK_NAMESPACE { TYPE NAME ARGS; }

#define __CXXTEST_MOCK_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__PROTOTYPE( MOCK, void, NAME, ARGS, REAL, CALL )

#define __CXXTEST_SUPPLY__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    TYPE REAL ARGS;

#define __CXXTEST_SUPPLY_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__PROTOTYPE( MOCK, void, NAME, ARGS, REAL, CALL )

//
// Class declarations: For test files
//
#define __CXXTEST_MOCK__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        class Base_##MOCK : public CxxTest::Link \
        { \
        public: \
            Base_##MOCK(); \
            ~Base_##MOCK(); \
            bool setUp(); \
            bool tearDown(); \
         \
            static Base_##MOCK &current(); \
         \
            virtual TYPE NAME ARGS = 0; \
         \
        private: \
            static CxxTest::List _list; \
        }; \
         \
        class Real_##MOCK  : public Base_##MOCK \
        { \
        public: \
            TYPE NAME ARGS; \
        }; \
         \
        class _Unimplemented_##MOCK  : public Base_##MOCK \
        { \
        public: \
            TYPE NAME ARGS; \
        }; \
    }

#define __CXXTEST_MOCK_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__CLASS_DECLARATION( MOCK, void, NAME, ARGS, REAL, CALL )

#define __CXXTEST_SUPPLY__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        class Base_##MOCK : public CxxTest::Link \
        { \
        public: \
            Base_##MOCK(); \
            ~Base_##MOCK(); \
            bool setUp(); \
            bool tearDown(); \
         \
            static Base_##MOCK &current(); \
         \
            virtual TYPE NAME ARGS = 0; \
         \
        private: \
            static CxxTest::List _list; \
        }; \
         \
        class _Unimplemented_##MOCK  : public Base_##MOCK \
        { \
        public: \
            TYPE NAME ARGS; \
        }; \
    }

#define __CXXTEST_SUPPLY_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__CLASS_DECLARATION( MOCK, void, NAME, ARGS, REAL, CALL )

//
// Class implementation: For test source files
//
#define __CXXTEST_MOCK__COMMON_CLASS_IMPLEMENTATION( MOCK, NAME ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
         \
        CxxTest::List Base_##MOCK::_list = { 0, 0 }; \
         \
        Base_##MOCK::Base_##MOCK() { attach( _list ); } \
        Base_##MOCK::~Base_##MOCK() { detach( _list ); } \
        bool Base_##MOCK::setUp() { return true; } \
        bool Base_##MOCK::tearDown() { return true; } \
         \
        Base_##MOCK &Base_##MOCK::current() \
        { \
            if ( _list.empty() ) \
                static _Unimplemented_##MOCK unimplemented; \
            return *(Base_##MOCK *)_list.tail(); \
        } \
    }

#define __CXXTEST_MOCK__CLASS_IMPLEMENTATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__COMMON_CLASS_IMPLEMENTATION( MOCK, NAME ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        TYPE Real_##MOCK::NAME ARGS \
        { \
            return REAL CALL; \
        } \
         \
        TYPE _Unimplemented_##MOCK::NAME ARGS \
        { \
            while ( false ) \
                return NAME CALL; \
            __CXXTEST_MOCK_UNIMPLEMENTED( NAME, ARGS ); \
            return MockTraits<TYPE>::defaultValue(); \
        } \
         \
        TYPE NAME ARGS \
        { \
            return Base_##MOCK::current().NAME CALL; \
        } \
    }

#define __CXXTEST_MOCK_VOID__CLASS_IMPLEMENTATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__COMMON_CLASS_IMPLEMENTATION( MOCK, NAME ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        void Real_##MOCK::NAME ARGS \
        { \
            REAL CALL; \
        } \
         \
        void _Unimplemented_##MOCK::NAME ARGS \
        { \
            while ( false ) \
                NAME CALL; \
            __CXXTEST_MOCK_UNIMPLEMENTED( NAME, ARGS ); \
        } \
         \
        void NAME ARGS \
        { \
            Base_##MOCK::current().NAME CALL; \
        } \
    }

#define __CXXTEST_SUPPLY__CLASS_IMPLEMENTATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__COMMON_CLASS_IMPLEMENTATION( MOCK, NAME ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        TYPE _Unimplemented_##MOCK::NAME ARGS \
        { \
            while ( false ) \
                return NAME CALL; \
            __CXXTEST_MOCK_UNIMPLEMENTED( NAME, ARGS ); \
            return MockTraits<TYPE>::defaultValue(); \
        } \
    } \
     \
    TYPE REAL ARGS \
    { \
        return CXXTEST_MOCK_NAMESPACE::Base_##MOCK::current().NAME CALL; \
    }

#define __CXXTEST_SUPPLY_VOID__CLASS_IMPLEMENTATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__COMMON_CLASS_IMPLEMENTATION( MOCK, NAME ) \
    namespace CXXTEST_MOCK_NAMESPACE { \
        void _Unimplemented_##MOCK::NAME ARGS \
        { \
            while ( false ) \
                NAME CALL; \
            __CXXTEST_MOCK_UNIMPLEMENTED( NAME, ARGS ); \
        } \
    } \
     \
    void REAL ARGS \
    { \
        CXXTEST_MOCK_NAMESPACE::Base_##MOCK::current().NAME CALL; \
    } \

//
// Error for calling mock function w/o object
//
#define __CXXTEST_MOCK_UNIMPLEMENTED( NAME, ARGS ) \
    TS_FAIL( CXXTEST_MOCK_NAMESPACE_STR #NAME #ARGS " called with no " \
             CXXTEST_MOCK_NAMESPACE_STR "Base_" #NAME " object" ); \

#define CXXTEST_MOCK_NAMESPACE_STR __CXXTEST_STR(CXXTEST_MOCK_NAMESPACE) "::"
#define __CXXTEST_STR(X) __CXXTEST_XSTR(X)
#define __CXXTEST_XSTR(X) #X

#if defined(CXXTEST_MOCK_TEST_SOURCE_FILE)
//
// Test source file: Prototypes, class declarations and implementation
//
#include <cxxtest/TestSuite.h>

__CXXTEST_MOCK__TRAITS

#define CXXTEST_MOCK( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__CLASS_IMPLEMENTATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_MOCK_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__CLASS_IMPLEMENTATION( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__CLASS_IMPLEMENTATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__CLASS_IMPLEMENTATION( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#elif defined(CXXTEST_FLAGS) || defined(CXXTEST_RUNNING)
//
// Test file other than source: Prototypes and class declarations
//
#include <cxxtest/TestSuite.h>

__CXXTEST_MOCK__TRAITS;

#define CXXTEST_MOCK( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_MOCK_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__CLASS_DECLARATION( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__CLASS_DECLARATION( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#elif defined(CXXTEST_MOCK_REAL_SOURCE_FILE)
//
// Real source file: "Real" implementations
//
#define CXXTEST_MOCK( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    namespace CXXTEST_MOCK_NAMESPACE { TYPE NAME ARGS { return REAL CALL; } } using namespace dummy_mock_ns

#define CXXTEST_MOCK_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    namespace CXXTEST_MOCK_NAMESPACE { void NAME ARGS { REAL CALL; } } using namespace dummy_mock_ns

#else
//
// Ordinary header file: Just prototypes
//

#define CXXTEST_MOCK( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_MOCK_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_MOCK_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#define CXXTEST_SUPPLY_VOID( MOCK, NAME, ARGS, REAL, CALL ) \
    __CXXTEST_SUPPLY_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    using namespace dummy_mock_ns

#endif // Ordinary header file

//
// How to supply extern "C" functions
//
#define CXXTEST_SUPPLY_C( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    CXXTEST_EXTERN_C __CXXTEST_SUPPLY__PROTOTYPE( MOCK, TYPE, NAME, ARGS, REAL, CALL ) \
    CXXTEST_SUPPLY( MOCK, TYPE, NAME, ARGS, REAL, CALL )

#define CXXTEST_SUPPLY_VOID_C( MOCK, NAME, ARGS, REAL, CALL ) \
    CXXTEST_EXTERN_C __CXXTEST_SUPPLY_VOID__PROTOTYPE( MOCK, NAME, ARGS, REAL, CALL ) \
    CXXTEST_SUPPLY_VOID( MOCK, NAME, ARGS, REAL, CALL )

//
// Usually we mean the global namespace
//
#define CXXTEST_MOCK_GLOBAL( TYPE, NAME, ARGS, CALL ) \
    CXXTEST_MOCK( NAME, TYPE, NAME, ARGS, ::NAME, CALL )

#define CXXTEST_MOCK_VOID_GLOBAL( NAME, ARGS, CALL ) \
    CXXTEST_MOCK_VOID( NAME, NAME, ARGS, ::NAME, CALL )

#define CXXTEST_SUPPLY_GLOBAL( TYPE, NAME, ARGS, CALL ) \
    CXXTEST_SUPPLY( NAME, TYPE, NAME, ARGS, NAME, CALL )

#define CXXTEST_SUPPLY_VOID_GLOBAL( NAME, ARGS, CALL ) \
    CXXTEST_SUPPLY_VOID( NAME, NAME, ARGS, NAME, CALL )

#define CXXTEST_SUPPLY_GLOBAL_C( TYPE, NAME, ARGS, CALL ) \
    CXXTEST_SUPPLY_C( NAME, TYPE, NAME, ARGS, NAME, CALL )

#define CXXTEST_SUPPLY_VOID_GLOBAL_C( NAME, ARGS, CALL ) \
    CXXTEST_SUPPLY_VOID_C( NAME, NAME, ARGS, NAME, CALL )

//
// What to return when no mock object has been created.
// The default value of 0 usually works, but some cases may need this.
//
#define CXXTEST_MOCK_DEFAULT_VALUE( TYPE, VALUE ) \
    namespace CXXTEST_MOCK_NAMESPACE \
    { \
        template<> \
        class MockTraits<TYPE> \
        { \
        public: \
            static TYPE defaultValue() { return VALUE; } \
        }; \
    } using namespace dummy_mock_ns

#endif // __cxxtest__Mock_h__


================================================
FILE: cxxtest/cxxtest/ParenPrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__ParenPrinter_h__
#define __cxxtest__ParenPrinter_h__

//
// The ParenPrinter is identical to the ErrorPrinter, except it
// prints the line number in a format expected by some compilers
// (notably, MSVC).
//

#include <cxxtest/ErrorPrinter.h>

namespace CxxTest 
{
    class ParenPrinter : public ErrorPrinter
    {
    public:
        ParenPrinter( CXXTEST_STD(ostream) &o = CXXTEST_STD(cout) ) : ErrorPrinter( o, "(", ")" ) {}
    };
}

#endif // __cxxtest__ParenPrinter_h__


================================================
FILE: cxxtest/cxxtest/QtGui.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__QtGui_h__
#define __cxxtest__QtGui_h__

//
// The QtGui displays a simple progress bar using the Qt Toolkit.  It
// has been tested with versions 2.x and 3.x.
// 
// Apart from normal Qt command-line arguments, it accepts the following options:
//   -minimized    Start minimized, pop up on error
//   -keep         Don't close the window at the end
//   -title TITLE  Set the window caption
//
// If both are -minimized and -keep specified, GUI will only keep the
// window if it's in focus.
//

#include <cxxtest/Gui.h>

#include <qapplication.h>
#include <qglobal.h>
#include <qlabel.h>
#include <qlayout.h>
#include <qmessagebox.h>
#include <qpixmap.h>
#include <qprogressbar.h>
#include <qstatusbar.h>

namespace CxxTest
{
    class QtGui : public GuiListener
    {
    public:
        void enterGui( int &argc, char **argv )
        {
            parseCommandLine( argc, argv );
            createApplication( argc, argv );
        }

        void enterWorld( const WorldDescription &wd )
        {
            createWindow( wd );
            processEvents();
        }

        void guiEnterSuite( const char *suiteName )
        {
            showSuiteName( suiteName );
        }

        void guiEnterTest( const char *suiteName, const char *testName )
        {
            setCaption( suiteName, testName );
            advanceProgressBar();
            showTestName( testName );
            showTestsDone( _progressBar->progress() );
            processEvents();
        }

        void yellowBar()
        {
            setColor( 255, 255, 0 );
            setIcon( QMessageBox::Warning );
            getTotalTests();
            processEvents();
        }
        
        void redBar()
        {
            if ( _startMinimized && _mainWindow->isMinimized() )
                showNormal();
            setColor( 255, 0, 0 );
            setIcon( QMessageBox::Critical );
            getTotalTests();
            processEvents();
        }

        void leaveGui()
        {
            if ( keep() ) {
                showSummary();
                _application->exec();
            }
            else
                _mainWindow->close( true );
        }

    private:
        QString _title;
        bool _startMinimized, _keep;
        unsigned _numTotalTests;
        QString _strTotalTests;
        QApplication *_application;
        QWidget *_mainWindow;
        QVBoxLayout *_layout;
        QProgressBar *_progressBar;
        QStatusBar *_statusBar;
        QLabel *_suiteName, *_testName, *_testsDone;

        void parseCommandLine( int argc, char **argv )
        {
            _startMinimized = _keep = false;
            _title = argv[0];
            
            for ( int i = 1; i < argc; ++ i ) {
                QString arg( argv[i] );
                if ( arg == "-minimized" )
                    _startMinimized = true;
                else if ( arg == "-keep" )
                    _keep = true;
                else if ( arg == "-title" && (i + 1 < argc) )
                    _title = argv[++i];
            }
        }

        void createApplication( int &argc, char **argv )
        {
            _application = new QApplication( argc, argv );
        }       
        
        void createWindow( const WorldDescription &wd )
        {
            getTotalTests( wd );            
            createMainWindow();
            createProgressBar();
            createStatusBar();
            setMainWidget();
            if ( _startMinimized )
                showMinimized();
            else
                showNormal();
        }

        void getTotalTests()
        {
            getTotalTests( tracker().world() );
        }

        void getTotalTests( const WorldDescription &wd )
        {
            _numTotalTests = wd.numTotalTests();
            char s[WorldDescription::MAX_STRLEN_TOTAL_TESTS];
            _strTotalTests = wd.strTotalTests( s );
        }

        void createMainWindow()
        {
            _mainWindow = new QWidget();
            _layout = new QVBoxLayout( _mainWindow );
        }

        void createProgressBar()
        {
            _layout->addWidget( _progressBar = new QProgressBar( _numTotalTests, _mainWindow ) );
            _progressBar->setProgress( 0 );
            setColor( 0, 255, 0 );
            setIcon( QMessageBox::Information );
        }

        void createStatusBar()
        {
            _layout->addWidget( _statusBar = new QStatusBar( _mainWindow ) );
            _statusBar->addWidget( _suiteName = new QLabel( _statusBar ), 2 );
            _statusBar->addWidget( _testName = new QLabel( _statusBar ), 4 );
            _statusBar->addWidget( _testsDone = new QLabel( _statusBar ), 1 );
        }

        void setMainWidget()
        {
            _application->setMainWidget( _mainWindow );
        }

        void showMinimized()
        {
            _mainWindow->showMinimized();
        }

        void showNormal()
        {
            _mainWindow->showNormal();
            centerWindow();
        }

        void setCaption( const QString &suiteName, const QString &testName )
        {
            _mainWindow->setCaption( _title + " - " + suiteName + "::" + testName + "()" );
        }

        void showSuiteName( const QString &suiteName )
        {
            _suiteName->setText( "class " + suiteName );
        }

        void advanceProgressBar()
        {
            _progressBar->setProgress( _progressBar->progress() + 1 );
        }

        void showTestName( const QString &testName )
        {
            _testName->setText( testName + "()" );
        }

        void showTestsDone( unsigned testsDone )
        {
            _testsDone->setText( asString( testsDone ) + " of " + _strTotalTests );
        }

        static QString asString( unsigned n )
        {
            return QString::number( n );
        }

        void setColor( int r, int g, int b )
        {
            QPalette palette = _progressBar->palette();
            palette.setColor( QColorGroup::Highlight, QColor( r, g, b ) );
            _progressBar->setPalette( palette );
        }

        void setIcon( QMessageBox::Icon icon )
        {
#if QT_VERSION >= 0x030000
            _mainWindow->setIcon( QMessageBox::standardIcon( icon ) );
#else // Qt version < 3.0.0
            _mainWindow->setIcon( QMessageBox::standardIcon( icon, QApplication::style().guiStyle() ) );
#endif // QT_VERSION
        }

        void processEvents()
        {
            _application->processEvents();
        }

        void centerWindow()
        {
            QWidget *desktop = QApplication::desktop();
            int xCenter = desktop->x() + (desktop->width() / 2);
            int yCenter = desktop->y() + (desktop->height() / 2);
            
            int windowWidth = (desktop->width() * 4) / 5;
            int windowHeight = _mainWindow->height();
            _mainWindow->setGeometry( xCenter - (windowWidth / 2), yCenter - (windowHeight / 2), windowWidth, windowHeight );
        }

        bool keep()
        {
            if ( !_keep )
                return false;
            if ( !_startMinimized )
                return true;
            return (_mainWindow == _application->activeWindow());
        }

        void showSummary()
        {
            QString summary = _strTotalTests + (_numTotalTests == 1 ? " test" : " tests");
            if ( tracker().failedTests() )
                summary = "Failed " + asString( tracker().failedTests() ) + " of " + summary;
            else
                summary = summary + " passed";

            _mainWindow->setCaption( _title + " - " + summary );

            _statusBar->removeWidget( _suiteName );
            _statusBar->removeWidget( _testName );
            _testsDone->setText( summary );
        }
    };
}

#endif // __cxxtest__QtGui_h__


================================================
FILE: cxxtest/cxxtest/RealDescriptions.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__RealDescriptions_cpp__
#define __cxxtest__RealDescriptions_cpp__

//
// NOTE: If an error occur during world construction/deletion, CxxTest cannot
//       know where the error originated.
//

#include <cxxtest/RealDescriptions.h>

namespace CxxTest 
{
    RealTestDescription::RealTestDescription()
    {
    }
        
    RealTestDescription::RealTestDescription( List &argList,
                                              SuiteDescription &argSuite,
                                              unsigned argLine,
                                              const char *argTestName )
    {
        initialize( argList, argSuite, argLine, argTestName );
    }

    void RealTestDescription::initialize( List &argList,
                                          SuiteDescription &argSuite,
                                          unsigned argLine,
                                          const char *argTestName )
    {
        _suite = &argSuite;
        _line = argLine;
        _testName = argTestName;
        attach( argList );
    }
        
    bool RealTestDescription::setUp()
    {
        if ( !suite() )
            return false;

        for ( GlobalFixture *gf = GlobalFixture::firstGlobalFixture(); gf != 0; gf = gf->nextGlobalFixture() ) {
            bool ok;
            _TS_TRY { ok = gf->setUp(); }
            _TS_LAST_CATCH( { ok = false; } );

            if ( !ok ) {
                doFailTest( file(), line(), "Error in GlobalFixture::setUp()" );
                return false;
            }
        }

        _TS_TRY {
            bool ok = false;
            _TSM_ASSERT_THROWS_NOTHING( file(), line(), "Exception thrown from setUp()", suite()->setUp(); ok=true );
            if (ok == false) return ok;
        }
        _TS_CATCH_ABORT( { return false; } );

        return true;
    }

    bool RealTestDescription::tearDown()
    {
        if ( !suite() )
            return false;

        _TS_TRY {
            _TSM_ASSERT_THROWS_NOTHING( file(), line(), "Exception thrown from tearDown()", suite()->tearDown() );
        }
        _TS_CATCH_ABORT( { return false; } );

        for ( GlobalFixture *gf = GlobalFixture::lastGlobalFixture(); gf != 0; gf = gf->prevGlobalFixture() ) {
            bool ok;
            _TS_TRY { ok = gf->tearDown(); }
            _TS_LAST_CATCH( { ok = false; } );

            if ( !ok ) {
                doFailTest( file(), line(), "Error in GlobalFixture::tearDown()" );
                return false;
            }
        }

        return true;
    }

    const char *RealTestDescription::file() const { return _suite->file(); }
    int RealTestDescription::line() const { return _line; }
    const char *RealTestDescription::testName() const { return _testName; }
    const char *RealTestDescription::suiteName() const { return _suite->suiteName(); }

    TestDescription *RealTestDescription::next() { return (RealTestDescription *)Link::next(); }
    const TestDescription *RealTestDescription::next() const { return (const RealTestDescription *)Link::next(); }

    TestSuite *RealTestDescription::suite() const { return _suite->suite(); }

    void RealTestDescription::run()
    {
        _TS_TRY { runTest(); }
        _TS_CATCH_ABORT( {} )
            ___TSM_CATCH( file(), line(), "Exception thrown from test" );
    }
        
    RealSuiteDescription::RealSuiteDescription() {}
    RealSuiteDescription::RealSuiteDescription( const char *argFile,
                                                unsigned argLine,
                                                const char *argSuiteName,
                                                List &argTests )
    {
        initialize( argFile, argLine, argSuiteName, argTests );
    }

    void RealSuiteDescription::initialize( const char *argFile,
                                           unsigned argLine,
                                           const char *argSuiteName,
                                           List &argTests )
    {
        _file = argFile;
        _line = argLine;
        _suiteName = argSuiteName;
        _tests = &argTests;
            
        attach( _suites );
    }

    const char *RealSuiteDescription::file() const { return _file; }
    int RealSuiteDescription::line() const { return _line; }
    const char *RealSuiteDescription::suiteName() const { return _suiteName; }

    TestDescription *RealSuiteDescription::firstTest() { return (RealTestDescription *)_tests->head(); }
    const TestDescription *RealSuiteDescription::firstTest() const { return (const RealTestDescription *)_tests->head(); }
    SuiteDescription *RealSuiteDescription::next() { return (RealSuiteDescription *)Link::next(); }
    const SuiteDescription *RealSuiteDescription::next() const { return (const RealSuiteDescription *)Link::next(); }
        
    unsigned RealSuiteDescription::numTests() const { return _tests->size(); }
    
    const TestDescription &RealSuiteDescription::testDescription( unsigned i ) const
    {
        return *(RealTestDescription *)_tests->nth( i );
    }

    void RealSuiteDescription::activateAllTests()
    {
        _tests->activateAll();
    }
        
    bool RealSuiteDescription::leaveOnly( const char *testName )
    {
        for ( TestDescription *td = firstTest(); td != 0; td = td->next() ) {
            if ( stringsEqual( td->testName(), testName ) ) {
                _tests->leaveOnly( *td );
                return true;
            }
        }
        return false;        
    }
        
    StaticSuiteDescription::StaticSuiteDescription() {}
    StaticSuiteDescription::StaticSuiteDescription( const char *argFile, unsigned argLine,
                                                    const char *argSuiteName, TestSuite &argSuite,
                                                    List &argTests ) :
        RealSuiteDescription( argFile, argLine, argSuiteName, argTests )
    {
        doInitialize( argSuite );
    }

    void StaticSuiteDescription::initialize( const char *argFile, unsigned argLine,
                                             const char *argSuiteName, TestSuite &argSuite,
                                             List &argTests )
    {
        RealSuiteDescription::initialize( argFile, argLine, argSuiteName, argTests );
        doInitialize( argSuite );
    }
        
    void StaticSuiteDescription::doInitialize( TestSuite &argSuite )
    {
        _suite = &argSuite;
    }

    TestSuite *StaticSuiteDescription::suite() const
    {
        return _suite;
    }

    bool StaticSuiteDescription::setUp() { return true; }
    bool StaticSuiteDescription::tearDown() { return true; }

    CommonDynamicSuiteDescription::CommonDynamicSuiteDescription() {}
    CommonDynamicSuiteDescription::CommonDynamicSuiteDescription( const char *argFile, unsigned argLine,
                                                                  const char *argSuiteName, List &argTests,
                                                                  unsigned argCreateLine, unsigned argDestroyLine ) :
        RealSuiteDescription( argFile, argLine, argSuiteName, argTests )
    {
        doInitialize( argCreateLine, argDestroyLine );
    }

    void CommonDynamicSuiteDescription::initialize( const char *argFile, unsigned argLine,
                                                    const char *argSuiteName, List &argTests,
                                                    unsigned argCreateLine, unsigned argDestroyLine )
    {
        RealSuiteDescription::initialize( argFile, argLine, argSuiteName, argTests );
        doInitialize( argCreateLine, argDestroyLine );
    }

    void CommonDynamicSuiteDescription::doInitialize( unsigned argCreateLine, unsigned argDestroyLine )
    {
        _createLine = argCreateLine;
        _destroyLine = argDestroyLine;
    }
        
    List &RealWorldDescription::suites()
    {
        return RealSuiteDescription::_suites;
    }
        
    unsigned RealWorldDescription::numSuites( void ) const
    {
        return suites().size();
    }
        
    unsigned RealWorldDescription::numTotalTests( void ) const
    {
        unsigned count = 0;
        for ( const SuiteDescription *sd = firstSuite(); sd != 0; sd = sd->next() )
            count += sd->numTests();
        return count;
    }
        
    SuiteDescription *RealWorldDescription::firstSuite()
    {
        return (RealSuiteDescription *)suites().head();
    }

    const SuiteDescription *RealWorldDescription::firstSuite() const
    {
        return (const RealSuiteDescription *)suites().head();
    }

    const SuiteDescription &RealWorldDescription::suiteDescription( unsigned i ) const
    {
        return *(const RealSuiteDescription *)suites().nth( i );
    }

    void RealWorldDescription::activateAllTests()
    {
        suites().activateAll();
        for ( SuiteDescription *sd = firstSuite(); sd != 0; sd = sd->next() )
            sd->activateAllTests();
    }

    bool RealWorldDescription::leaveOnly( const char *suiteName, const char *testName )
    {
        for ( SuiteDescription *sd = firstSuite(); sd != 0; sd = sd->next() ) {
            if ( stringsEqual( sd->suiteName(), suiteName ) ) {
                if ( testName )
                    if ( !sd->leaveOnly( testName ) )
                        return false;
                suites().leaveOnly( *sd );
                return true;
            }
        }
        return false;
    }
        
    bool RealWorldDescription::setUp()
    {
        for ( GlobalFixture *gf = GlobalFixture::firstGlobalFixture(); gf != 0; gf = gf->nextGlobalFixture() ) {
            bool ok;
            _TS_TRY { 
                ok = gf->setUpWorld(); 
                if (tracker().testFailed()) {
                    tracker().initialize();
                    ok = false;
                    }
                }
            _TS_LAST_CATCH( { ok = false; } );

            if ( !ok ) {
                reportError( "Error setting up world" );
                return false;
            }
        }

        return true;
    }

    bool RealWorldDescription::tearDown()
    {
        for ( GlobalFixture *gf = GlobalFixture::lastGlobalFixture(); gf != 0; gf = gf->prevGlobalFixture() ) {
            bool ok;
            _TS_TRY { ok = gf->tearDownWorld(); }
            _TS_LAST_CATCH( { ok = false; } );

            if ( !ok ) {
                reportError( "Error tearing down world" );
                return false;
            }
        }

        return true;
    }

    void RealWorldDescription::reportError( const char *message )
    {
        doWarn( __FILE__, 5, message );
    }

    void activateAllTests()
    {
        RealWorldDescription().activateAllTests();
    }

    bool leaveOnly( const char *suiteName, const char *testName )
    {
        return RealWorldDescription().leaveOnly( suiteName, testName );
    }
}

#endif // __cxxtest__RealDescriptions_cpp__


================================================
FILE: cxxtest/cxxtest/RealDescriptions.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__RealDescriptions_h__
#define __cxxtest__RealDescriptions_h__

//
// The "real" description classes
//

#include <cxxtest/Descriptions.h>
#include <cxxtest/TestSuite.h>
#include <cxxtest/GlobalFixture.h>

namespace CxxTest 
{
    class RealTestDescription : public TestDescription
    {
    public:
        RealTestDescription();
        RealTestDescription( List &argList, SuiteDescription &argSuite, unsigned argLine, const char *argTestName );
        void initialize( List &argList, SuiteDescription &argSuite, unsigned argLine, const char *argTestName );
        
        const char *file() const;
        int line() const;
        const char *testName() const;
        const char *suiteName() const;

        TestDescription *next();
        const TestDescription *next() const;

        TestSuite *suite() const;

        bool setUp();
        void run();        
        bool tearDown();
        
    private:
        RealTestDescription( const RealTestDescription & );
        RealTestDescription &operator=( const RealTestDescription & );

        virtual void runTest() = 0;
        
        SuiteDescription *_suite;
        int _line;
        const char *_testName;
    };

    class RealSuiteDescription : public SuiteDescription
    {
    public:
        RealSuiteDescription();
        RealSuiteDescription( const char *argFile, unsigned argLine, const char *argSuiteName, List &argTests );
        
        void initialize( const char *argFile, unsigned argLine, const char *argSuiteName, List &argTests );

        const char *file() const;
        int line() const;
        const char *suiteName() const;

        TestDescription *firstTest();
        const TestDescription *firstTest() const;
        SuiteDescription *next();
        const SuiteDescription *next() const;
        
        unsigned numTests() const;
        const TestDescription &testDescription( unsigned i ) const;

        void activateAllTests();
        bool leaveOnly( const char *testName );
        
    private:
        RealSuiteDescription( const RealSuiteDescription & );
        RealSuiteDescription &operator=( const RealSuiteDescription & );
        
        const char *_file;
        int _line;
        const char *_suiteName;
        List *_tests;

        static List _suites;
        friend class RealWorldDescription;
    };

    class StaticSuiteDescription : public RealSuiteDescription
    {
    public:
        StaticSuiteDescription();
        StaticSuiteDescription( const char *argFile, unsigned argLine,
                                const char *argSuiteName, TestSuite &argSuite,
                                List &argTests );

        void initialize( const char *argFile, unsigned argLine,
                         const char *argSuiteName, TestSuite &argSuite,
                         List &argTests );
        TestSuite *suite() const;
        
        bool setUp();
        bool tearDown();
        
    private:
        StaticSuiteDescription( const StaticSuiteDescription & );
        StaticSuiteDescription &operator=( const StaticSuiteDescription & );

        void doInitialize( TestSuite &argSuite );
        
        TestSuite *_suite;
    };

    class CommonDynamicSuiteDescription : public RealSuiteDescription
    {
    public:
        CommonDynamicSuiteDescription();
        CommonDynamicSuiteDescription( const char *argFile, unsigned argLine,
                                       const char *argSuiteName, List &argTests,
                                       unsigned argCreateLine, unsigned argDestroyLine );

        void initialize( const char *argFile, unsigned argLine,
                         const char *argSuiteName, List &argTests,
                         unsigned argCreateLine, unsigned argDestroyLine );

    protected:
        unsigned _createLine, _destroyLine;

    private:
        void doInitialize( unsigned argCreateLine, unsigned argDestroyLine );
    };
    
    template<class S>
    class DynamicSuiteDescription : public CommonDynamicSuiteDescription
    {
    public:
        DynamicSuiteDescription() {}
        DynamicSuiteDescription( const char *argFile, unsigned argLine,
                                 const char *argSuiteName, List &argTests,
                                 S *&argSuite, unsigned argCreateLine,
                                 unsigned argDestroyLine ) :
            CommonDynamicSuiteDescription( argFile, argLine, argSuiteName, argTests, argCreateLine, argDestroyLine )
        {
            _suite = &argSuite;
        }

        void initialize( const char *argFile, unsigned argLine,
                         const char *argSuiteName, List &argTests,
                         S *&argSuite, unsigned argCreateLine,
                         unsigned argDestroyLine )
        {
            CommonDynamicSuiteDescription::initialize( argFile, argLine,
                                                       argSuiteName, argTests,
                                                       argCreateLine, argDestroyLine );
            _suite = &argSuite;
        }

        TestSuite *suite() const { return realSuite(); }

        bool setUp();
        bool tearDown();
        
    private:
        S *realSuite() const { return *_suite; }
        void setSuite( S *s ) { *_suite = s; }

        void createSuite()
        {
            setSuite( S::createSuite() );
        }
        
        void destroySuite()
        {
            S *s = realSuite();
            setSuite( 0 );
            S::destroySuite( s );
        }

        S **_suite;
    };

    template<class S>
    bool DynamicSuiteDescription<S>::setUp()
    {
        _TS_TRY {
            _TSM_ASSERT_THROWS_NOTHING( file(), _createLine, "Exception thrown from createSuite()", createSuite() );
            _TSM_ASSERT( file(), _createLine, "createSuite() failed", suite() != 0 );
        }
        _TS_CATCH_ABORT( { return false; } );

        return (suite() != 0);
    }

    template<class S>
    bool DynamicSuiteDescription<S>::tearDown()
    {
        if ( !_suite )
            return true;
            
        _TS_TRY {
            _TSM_ASSERT_THROWS_NOTHING( file(), _destroyLine, "destroySuite() failed", destroySuite() );
        }
        _TS_CATCH_ABORT( { return false; } );

        return true;
    }
        
    class RealWorldDescription : public WorldDescription
    {
    public:
        static List &suites();
        const char *worldName() const { return _worldName;}
        unsigned numSuites( void ) const;
        unsigned numTotalTests( void ) const;
        SuiteDescription *firstSuite();
        const SuiteDescription *firstSuite() const;
        const SuiteDescription &suiteDescription( unsigned i ) const;
        void activateAllTests();
        bool leaveOnly( const char *suiteName, const char *testName = 0 );
        
        bool setUp();
        bool tearDown();
        static void reportError( const char *message );

        static const char *_worldName;
    };

    void activateAllTests();
    bool leaveOnly( const char *suiteName, const char *testName = 0 );
}

#endif // __cxxtest__RealDescriptions_h__


================================================
FILE: cxxtest/cxxtest/Root.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Root_cpp__
#define __cxxtest__Root_cpp__

//
// This file holds the "root" of CxxTest, i.e.
// the parts that must be in a source file file.
//

#include <cxxtest/Descriptions.cpp>
#include <cxxtest/DummyDescriptions.cpp>
#include <cxxtest/GlobalFixture.cpp>
#include <cxxtest/LinkedList.cpp>
#include <cxxtest/RealDescriptions.cpp>
#include <cxxtest/TestSuite.cpp>
#include <cxxtest/TestTracker.cpp>
#include <cxxtest/ValueTraits.cpp>

#endif // __cxxtest__Root_cpp__


================================================
FILE: cxxtest/cxxtest/SelfTest.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest_SelfTest_h__
#define __cxxtest_SelfTest_h__

#define CXXTEST_SUITE(name)
#define CXXTEST_CODE(member)

#endif // __cxxtest_SelfTest_h__


================================================
FILE: cxxtest/cxxtest/StdHeaders.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest_StdHeaders_h__
#define __cxxtest_StdHeaders_h__

//
// This file basically #includes the STL headers.
// It exists to support warning level 4 in Visual C++
//

#ifdef _MSC_VER
#   pragma warning( push, 1 )
#endif // _MSC_VER

#include <complex>
#include <deque>
#include <list>
#include <map>
#include <set>
#include <string>
#include <vector>

#ifdef _MSC_VER
#   pragma warning( pop )
#endif // _MSC_VER

#endif // __cxxtest_StdHeaders_h__


================================================
FILE: cxxtest/cxxtest/StdTestSuite.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__StdTestSuite_h__
#define __cxxtest__StdTestSuite_h__

//
// This provides explicit partial specializations for STL-based
// TestSuite comparison functions
//

namespace CxxTest {

#ifdef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION

template<class X, class Y, class D>
struct delta<std::vector<X>, std::vector<Y>, D>
{
   static bool test(std::vector<X> x, std::vector<Y> y, D d)
   {
      if ( x.size() != y.size() )
         return false;
      for(size_t i = 0; i<x.size(); ++i)
         if ( ! delta<X,Y,D>::test(x[i], y[i], d) )
            return false;
      return true;
   }
};

template<class X, class Y, class D>
struct delta<std::list<X>, std::list<Y>, D>
{
   static bool test(std::list<X> x, std::list<Y> y, D d)
   {
      typename std::list<X>::const_iterator x_it = x.begin();
      typename std::list<Y>::const_iterator y_it = y.begin();
      for(; x_it != x.end(); ++x_it, ++y_it)
      {
         if ( y_it == y.end() )
            return false;
         if ( ! delta<X,Y,D>::test(*x_it, *y_it, d) )
            return false;
      }
      return y_it == y.end();
   }
};

#endif

} // namespace CxxTest

#endif // __cxxtest__StdTestSuite_h__


================================================
FILE: cxxtest/cxxtest/StdValueTraits.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest_StdValueTraits_h__
#define __cxxtest_StdValueTraits_h__

//
// This file defines ValueTraits for std:: stuff.
// It is #included by <cxxtest/ValueTraits.h> if you
// define CXXTEST_HAVE_STD
//

#include <cxxtest/ValueTraits.h>
#include <cxxtest/StdHeaders.h>

#ifdef _CXXTEST_OLD_STD
#   define CXXTEST_STD(x) x
#else // !_CXXTEST_OLD_STD
#   define CXXTEST_STD(x) std::x
#endif // _CXXTEST_OLD_STD

#ifndef CXXTEST_USER_VALUE_TRAITS

namespace CxxTest
{
    //
    // NOTE: This should have been
    // template<class Char, class Traits, class Allocator>
    // class ValueTraits< std::basic_string<Char, Traits, Allocator> > {};
    // But MSVC doesn't support it (yet).
    //

    //
    // If we have std::string, we might as well use it
    //
    class StdTraitsBase
    {
    public:
        StdTraitsBase &operator<<( const CXXTEST_STD(string) &s ) { _s += s; return *this; }
        const char *asString() const { return _s.c_str(); }

    private:
        CXXTEST_STD(string) _s;
    };
    
    //
    // std::string
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const CXXTEST_STD(string)> : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(string) &s )
        {
            *this << "\"";
            for ( unsigned i = 0; i < s.length(); ++ i ) {
                char c[sizeof("\\xXX")];
                charToString( s[i], c );
                *this << c;
            }
            *this << "\"";
        }
    };

    CXXTEST_COPY_CONST_TRAITS( CXXTEST_STD(string) );

#ifndef _CXXTEST_OLD_STD
    //
    // std::wstring
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const CXXTEST_STD(basic_string<wchar_t>)> : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(basic_string<wchar_t>) &s )
        {
            *this << "L\"";
            for ( unsigned i = 0; i < s.length(); ++ i ) {
                char c[sizeof("\\x12345678")];
                charToString( (unsigned long)s[i], c );
                *this << c;
            }
            *this << "\"";
        }
    };

    CXXTEST_COPY_CONST_TRAITS( CXXTEST_STD(basic_string<wchar_t>) );
#endif // _CXXTEST_OLD_STD

    //
    // Convert a range defined by iterators to a string
    // This is useful for almost all STL containers
    //
    template<class Stream, class Iterator>
    void dumpRange( Stream &s, Iterator first, Iterator last )
    {
        if ( first == last ) {
            s << "{}";
            return;
        }
        
        s << "{ ";
        while ( first != last ) {
            s << TS_AS_STRING(*first);
            if ( ++ first != last )
                s << ", ";
        }
        s << " }";
    }

#ifdef _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
    //
    // std::pair
    //
    template<class First, class Second>
    class ValueTraits< CXXTEST_STD(pair)<First, Second> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(pair)<First, Second> &p ) 
        {
            *this << "<" << TS_AS_STRING( p.first ) << ", " << TS_AS_STRING( p.second ) << ">";
        }
    };

    //
    // std::vector
    //
    template<class Element>
    class ValueTraits< CXXTEST_STD(vector)<Element> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(vector)<Element> &v )
        {
            dumpRange( *this, v.begin(), v.end() );
        }
    };

    //
    // std::list
    //
    template<class Element>
    class ValueTraits< CXXTEST_STD(list)<Element> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(list)<Element> &l )
        {
            dumpRange( *this, l.begin(), l.end() );
        }
    };

    //
    // std::set
    //
    template<class Element>
    class ValueTraits< CXXTEST_STD(set)<Element> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(set)<Element> &s )
        {
            dumpRange( *this, s.begin(), s.end() );
        }
    };

    //
    // std::map
    //
    template<class Key, class Value>
    class ValueTraits< CXXTEST_STD(map)<Key, Value> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(map)<Key, Value> &m )
        {
            dumpRange( *this, m.begin(), m.end() );
        }
    };    

    //
    // std::deque
    //
    template<class Element>
    class ValueTraits< CXXTEST_STD(deque)<Element> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(deque)<Element> &d )
        {
            dumpRange( *this, d.begin(), d.end() );
        }
    };

    //
    // std::multiset
    //
    template<class Element>
    class ValueTraits< CXXTEST_STD(multiset)<Element> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(multiset)<Element> &ms )
        {
            dumpRange( *this, ms.begin(), ms.end() );
        }
    };

    //
    // std::multimap
    //
    template<class Key, class Value>
    class ValueTraits< CXXTEST_STD(multimap)<Key, Value> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(multimap)<Key, Value> &mm )
        {
            dumpRange( *this, mm.begin(), mm.end() );
        }
    };

    //
    // std::complex
    //
    template<class Number>
    class ValueTraits< CXXTEST_STD(complex)<Number> > : public StdTraitsBase
    {
    public:
        ValueTraits( const CXXTEST_STD(complex)<Number> &c )
        {
            if ( !c.imag() )
                *this << TS_AS_STRING(c.real());
            else if ( !c.real() )
                *this << "(" << TS_AS_STRING(c.imag()) << " * i)";
            else
                *this << "(" << TS_AS_STRING(c.real()) << " + " << TS_AS_STRING(c.imag()) << " * i)";
        }
    };
#endif // _CXXTEST_PARTIAL_TEMPLATE_SPECIALIZATION
}

#endif // CXXTEST_USER_VALUE_TRAITS

#endif // __cxxtest_StdValueTraits_h__


================================================
FILE: cxxtest/cxxtest/StdioFilePrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__StdioFilePrinter_h__
#define __cxxtest__StdioFilePrinter_h__

//
// The StdioFilePrinter is a simple TestListener that
// just prints "OK" if everything goes well, otherwise
// reports the error in the format of compiler messages.
// This class uses <stdio.h>, i.e. FILE * and fprintf().
//

#include <cxxtest/ErrorFormatter.h>
#include <stdio.h>

namespace CxxTest 
{
    class StdioFilePrinter : public ErrorFormatter
    {
    public:
        StdioFilePrinter( FILE *o, const char *preLine = ":", const char *postLine = "" ) :
            ErrorFormatter( new Adapter(o), preLine, postLine ) {}
        virtual ~StdioFilePrinter() { delete outputStream(); }

    private:
        class Adapter : public OutputStream
        {
            Adapter( const Adapter & );
            Adapter &operator=( const Adapter & );
            
            FILE *_o;
            
        public:
            Adapter( FILE *o ) : _o(o) {}
            void flush() { fflush( _o ); }
            OutputStream &operator<<( unsigned i ) { fprintf( _o, "%u", i ); return *this; }
            OutputStream &operator<<( const char *s ) { fputs( s, _o ); return *this; }
            OutputStream &operator<<( Manipulator m ) { return OutputStream::operator<<( m ); }
        };
    };
}

#endif // __cxxtest__StdioFilePrinter_h__


================================================
FILE: cxxtest/cxxtest/StdioPrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__StdioPrinter_h__
#define __cxxtest__StdioPrinter_h__

//
// The StdioPrinter is an StdioFilePrinter which defaults to stdout.
// This should have been called StdOutPrinter or something, but the name
// has been historically used.
//

#include <cxxtest/StdioFilePrinter.h>

namespace CxxTest 
{
    class StdioPrinter : public StdioFilePrinter
    {
    public:
        StdioPrinter( FILE *o = stdout, const char *preLine = ":", const char *postLine = "" ) :
            StdioFilePrinter( o, preLine, postLine ) {}
    };
}

#endif // __cxxtest__StdioPrinter_h__


================================================
FILE: cxxtest/cxxtest/TeeListener.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TeeListener_h__
#define __cxxtest__TeeListener_h__

//
// A TeeListener notifies two "regular" TestListeners
//

#include <cxxtest/TestListener.h>
#include <cxxtest/TestListener.h>

namespace CxxTest
{
    class TeeListener : public TestListener
    {
    public:
        TeeListener()
        {
            setFirst( _dummy );
            setSecond( _dummy );
        }

        virtual ~TeeListener()
        {
        }

        void setFirst( TestListener &first )
        {
            _first = &first;
        }

        void setSecond( TestListener &second )
        {
            _second = &second;
        }

        void enterWorld( const WorldDescription &d )
        {
            _first->enterWorld( d );
            _second->enterWorld( d );
        }

        void enterSuite( const SuiteDescription &d )
        {
            _first->enterSuite( d );
            _second->enterSuite( d );
        }
        
        void enterTest( const TestDescription &d )
        {
            _first->enterTest( d );
            _second->enterTest( d );
        }
        
        void trace( const char *file, int line, const char *expression )
        {
            _first->trace( file, line, expression );
            _second->trace( file, line, expression );
        }
        
        void warning( const char *file, int line, const char *expression )
        {
            _first->warning( file, line, expression );
            _second->warning( file, line, expression );
        }
        
        void failedTest( const char *file, int line, const char *expression )
        {
            _first->failedTest( file, line, expression );
            _second->failedTest( file, line, expression );
        }
        
        void failedAssert( const char *file, int line, const char *expression )
        {
            _first->failedAssert( file, line, expression );
            _second->failedAssert( file, line, expression );
        }
        
        void failedAssertEquals( const char *file, int line,
                                 const char *xStr, const char *yStr,
                                 const char *x, const char *y )
        {
            _first->failedAssertEquals( file, line, xStr, yStr, x, y );
            _second->failedAssertEquals( file, line, xStr, yStr, x, y );
        }

        void failedAssertSameData( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *sizeStr, const void *x,
                                   const void *y, unsigned size )
        {
            _first->failedAssertSameData( file, line, xStr, yStr, sizeStr, x, y, size );
            _second->failedAssertSameData( file, line, xStr, yStr, sizeStr, x, y, size );
        }
        
        void failedAssertSameFiles( const char* file, int line, const char* file1, const char* file2, const char* explanation)
        {
            _first->failedAssertSameFiles( file, line, file1, file2, explanation );
            _second->failedAssertSameFiles( file, line, file1, file2, explanation );
        }

        void failedAssertDelta( const char *file, int line,
                                const char *xStr, const char *yStr, const char *dStr,
                                const char *x, const char *y, const char *d )
        {
            _first->failedAssertDelta( file, line, xStr, yStr, dStr, x, y, d );
            _second->failedAssertDelta( file, line, xStr, yStr, dStr, x, y, d );
        }
        
        void failedAssertDiffers( const char *file, int line,
                                  const char *xStr, const char *yStr,
                                  const char *value )
        {
            _first->failedAssertDiffers( file, line, xStr, yStr, value );
            _second->failedAssertDiffers( file, line, xStr, yStr, value );
        }
        
        void failedAssertLessThan( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            _first->failedAssertLessThan( file, line, xStr, yStr, x, y );
            _second->failedAssertLessThan( file, line, xStr, yStr, x, y );
        }
        
        void failedAssertLessThanEquals( const char *file, int line,
                                         const char *xStr, const char *yStr,
                                         const char *x, const char *y )
        {
            _first->failedAssertLessThanEquals( file, line, xStr, yStr, x, y );
            _second->failedAssertLessThanEquals( file, line, xStr, yStr, x, y );
        }
        
        void failedAssertPredicate( const char *file, int line,
                                    const char *predicate, const char *xStr, const char *x )
        {
            _first->failedAssertPredicate( file, line, predicate, xStr, x );
            _second->failedAssertPredicate( file, line, predicate, xStr, x );
        }
        
        void failedAssertRelation( const char *file, int line,
                                   const char *relation, const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            _first->failedAssertRelation( file, line, relation, xStr, yStr, x, y );
            _second->failedAssertRelation( file, line, relation, xStr, yStr, x, y );
        }
        
        void failedAssertThrows( const char *file, int line,
                                 const char *expression, const char *type,
                                 bool otherThrown )
        {
            _first->failedAssertThrows( file, line, expression, type, otherThrown );
            _second->failedAssertThrows( file, line, expression, type, otherThrown );
        }
        
        void failedAssertThrowsNot( const char *file, int line,
                                    const char *expression )
        {
            _first->failedAssertThrowsNot( file, line, expression );
            _second->failedAssertThrowsNot( file, line, expression );
        }
        
        void leaveTest( const TestDescription &d )
        {
            _first->leaveTest(d);
            _second->leaveTest(d);
        }
        
        void leaveSuite( const SuiteDescription &d )
        {
            _first->leaveSuite(d);
            _second->leaveSuite(d);
        }
        
        void leaveWorld( const WorldDescription &d )
        {
            _first->leaveWorld(d);
            _second->leaveWorld(d);
        }

    private:
        TestListener *_first, *_second;
        TestListener _dummy;
    };
}


#endif // __cxxtest__TeeListener_h__


================================================
FILE: cxxtest/cxxtest/TestListener.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TestListener_h__
#define __cxxtest__TestListener_h__

//
// TestListener is the base class for all "listeners",
// i.e. classes that receive notifications of the
// testing process.
//
// The names of the parameters are in comments to avoid
// "unused parameter" warnings.
//

#include <cxxtest/Descriptions.h>

namespace CxxTest
{
    class TestListener
    {
    public:
        TestListener() {}
        virtual ~TestListener() {}
        virtual void process_commandline(int& /*argc*/, char** /*argv*/) {}
        
        virtual void enterWorld( const WorldDescription & /*desc*/ ) {}
        virtual void enterSuite( const SuiteDescription & /*desc*/ ) {}
        virtual void enterTest( const TestDescription & /*desc*/ ) {}
        virtual void trace( const char * /*file*/, int /*line*/,
                            const char * /*expression*/ ) {}
        virtual void warning( const char * /*file*/, int /*line*/,
                              const char * /*expression*/ ) {}
        virtual void failedTest( const char * /*file*/, int /*line*/,
                                 const char * /*expression*/ ) {}
        virtual void failedAssert( const char * /*file*/, int /*line*/,
                                   const char * /*expression*/ ) {}
        virtual void failedAssertEquals( const char * /*file*/, int /*line*/,
                                         const char * /*xStr*/, const char * /*yStr*/,
                                         const char * /*x*/, const char * /*y*/ ) {}
        virtual void failedAssertSameData( const char * /*file*/, int /*line*/,
                                           const char * /*xStr*/, const char * /*yStr*/,
                                           const char * /*sizeStr*/, const void * /*x*/,
                                           const void * /*y*/, unsigned /*size*/ ) {}
        virtual void failedAssertDelta( const char * /*file*/, int /*line*/,
                                        const char * /*xStr*/, const char * /*yStr*/,
                                        const char * /*dStr*/, const char * /*x*/,
                                        const char * /*y*/, const char * /*d*/ ) {}
        virtual void failedAssertDiffers( const char * /*file*/, int /*line*/,
                                          const char * /*xStr*/, const char * /*yStr*/,
                                          const char * /*value*/ ) {}
        virtual void failedAssertLessThan( const char * /*file*/, int /*line*/,
                                           const char * /*xStr*/, const char * /*yStr*/,
                                           const char * /*x*/, const char * /*y*/ ) {}
        virtual void failedAssertLessThanEquals( const char * /*file*/, int /*line*/,
                                                 const char * /*xStr*/, const char * /*yStr*/,
                                                 const char * /*x*/, const char * /*y*/ ) {}
        virtual void failedAssertPredicate( const char * /*file*/, int /*line*/,
                                            const char * /*predicate*/, const char * /*xStr*/, const char * /*x*/ ) {}
        virtual void failedAssertRelation( const char * /*file*/, int /*line*/,
                                           const char * /*relation*/, const char * /*xStr*/, const char * /*yStr*/,
                                           const char * /*x*/, const char * /*y*/ ) {}
        virtual void failedAssertThrows( const char * /*file*/, int /*line*/,
                                         const char * /*expression*/, const char * /*type*/,
                                         bool /*otherThrown*/ ) {}
        virtual void failedAssertThrowsNot( const char * /*file*/, int /*line*/,
                                            const char * /*expression*/ ) {}
	    virtual void failedAssertSameFiles( const char* /*file*/, int /*line*/, 
                                            const char* , const char*, const char* ) {}
        virtual void leaveTest( const TestDescription & /*desc*/ ) {}
        virtual void leaveSuite( const SuiteDescription & /*desc*/ ) {}
        virtual void leaveWorld( const WorldDescription & /*desc*/ ) {}
    };
}

#endif // __cxxtest__TestListener_h__


================================================
FILE: cxxtest/cxxtest/TestMain.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __CxxTestMain_h
#define __CxxTestMain_h

#include <cxxtest/TestTracker.h>
#include <cxxtest/Flags.h>

#ifndef _CXXTEST_HAVE_STD
#   define _CXXTEST_HAVE_STD
#endif // _CXXTEST_HAVE_STD

#include <cxxtest/StdValueTraits.h>

#ifdef _CXXTEST_OLD_STD
#   include <iostream.h>
#   include <string.h>
#else // !_CXXTEST_OLD_STD
#   include <iostream>
#   include <cstring>
#endif // _CXXTEST_OLD_STD

namespace CxxTest
{

inline void print_help(const char* name)
{
   CXXTEST_STD(cerr) << name << " <suitename>" << CXXTEST_STD(endl);
   CXXTEST_STD(cerr) << name << " <suitename> <testname>" << CXXTEST_STD(endl);
   CXXTEST_STD(cerr) << name << " -h" << CXXTEST_STD(endl);
   CXXTEST_STD(cerr) << name << " --help" << CXXTEST_STD(endl);
   CXXTEST_STD(cerr) << name << " --help-tests" << CXXTEST_STD(endl);
   CXXTEST_STD(cerr) << name << " -v             Enable tracing output." << CXXTEST_STD(endl);
}


template <class TesterT>
int Main(TesterT& tmp, int argc, char* argv[])
{ 
//
// Parse the command-line arguments. The default behavior is to run all tests
//
// This is a primitive parser, but I'm not sure what sort of portable
// parser should be used in cxxtest.
//

//
// Print command-line syntax
//
for (int i=1; i<argc; i++) {
  if ((CXXTEST_STD(strcmp)(argv[i],"-h")==0) || (CXXTEST_STD(strcmp)(argv[i],"--help")==0)) {
     print_help(argv[0]);
     return 0;
  } else if ((CXXTEST_STD(strcmp)(argv[1],"--help-tests")==0)) {
    CXXTEST_STD(cout) << "Suite/Test Names" << CXXTEST_STD(endl);
    CXXTEST_STD(cout) << "---------------------------------------------------------------------------" << CXXTEST_STD(endl);
    for ( SuiteDescription *sd = RealWorldDescription().firstSuite(); sd; sd = sd->next() )
        for ( TestDescription *td = sd->firstTest(); td; td = td->next() )
            CXXTEST_STD(cout) << td->suiteName() << " " << td->testName() << CXXTEST_STD(endl);
    return 0;
  }
}

//
// Process command-line options here.
//
while ((argc > 1) && (argv[1][0] == '-')) {
  if (CXXTEST_STD(strcmp)(argv[1],"-v") == 0) {
     tracker().print_tracing = true;
     }
  else {
     CXXTEST_STD(cerr) << "ERROR: unknown option '" << argv[1] << "'" << CXXTEST_STD(endl);
     return -1;
     }
  for (int i=1; i<(argc-1); i++)
    argv[i] = argv[i+1];
  argc--;
  }

//
// Run experiments
//
bool status=false;
if ((argc==2) && (argv[1][0] != '-')) {
    status=leaveOnly(argv[1]);
    if (!status) {
       CXXTEST_STD(cerr) << "ERROR: unknown suite '" << argv[1] << "'" << CXXTEST_STD(endl);
       return -1;
       }
    }
if ((argc==3) && (argv[1][0] != '-')) {
    status=leaveOnly(argv[1],argv[2]);
    if (!status) {
       CXXTEST_STD(cerr) << "ERROR: unknown test '" << argv[1] << "::" << argv[2] << "'" << CXXTEST_STD(endl);
       return -1;
       }
    }

tmp.process_commandline(argc,argv);
return tmp.run();
}

}
#endif


================================================
FILE: cxxtest/cxxtest/TestRunner.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest_TestRunner_h__
#define __cxxtest_TestRunner_h__

//
// TestRunner is the class that runs all the tests.
// To use it, create an object that implements the TestListener
// interface and call TestRunner::runAllTests( myListener );
// 

#include <cxxtest/TestListener.h>
#include <cxxtest/RealDescriptions.h>
#include <cxxtest/TestSuite.h>
#include <cxxtest/TestTracker.h>

namespace CxxTest 
{
    class TestRunner
    {
    public:
        static void runAllTests( TestListener &listener )
        {
            tracker().setListener( &listener );
            _TS_TRY { TestRunner().runWorld(); }
            _TS_LAST_CATCH( { tracker().failedTest( __FILE__, __LINE__, "Exception thrown from world" ); } );
            tracker().setListener( 0 );
        }

        static void runAllTests( TestListener *listener )
        {
            if ( listener ) {
                listener->warning( __FILE__, __LINE__, "Deprecated; Use runAllTests( TestListener & )" );
                runAllTests( *listener );
            }
        }        
    
    private:
        void runWorld()
        {
            RealWorldDescription wd;
            WorldGuard sg;
            
            tracker().enterWorld( wd );
            if ( wd.setUp() ) {
                for ( SuiteDescription *sd = wd.firstSuite(); sd; sd = sd->next() )
                    if ( sd->active() )
                        runSuite( *sd );
            
                wd.tearDown();
            }
            tracker().leaveWorld( wd );
        }
    
        void runSuite( SuiteDescription &sd )
        {
            StateGuard sg;
            
            tracker().enterSuite( sd );
            if ( sd.setUp() ) {
                for ( TestDescription *td = sd.firstTest(); td; td = td->next() )
                    if ( td->active() )
                        runTest( *td );

                sd.tearDown();
            }
            tracker().leaveSuite( sd );
        }

        void runTest( TestDescription &td )
        {
            StateGuard sg;
            
            tracker().enterTest( td );
            if ( td.setUp() ) {
                td.run();
                td.tearDown();
            }
            tracker().leaveTest( td );
        }
        
        class StateGuard
        {
#ifdef _CXXTEST_HAVE_EH
            bool _abortTestOnFail;
#endif // _CXXTEST_HAVE_EH
            unsigned _maxDumpSize;
            
        public:
            StateGuard()
            {
#ifdef _CXXTEST_HAVE_EH
                _abortTestOnFail = abortTestOnFail();
#endif // _CXXTEST_HAVE_EH
                _maxDumpSize = maxDumpSize();
            }
            
            ~StateGuard()
            {
#ifdef _CXXTEST_HAVE_EH
                setAbortTestOnFail( _abortTestOnFail );
#endif // _CXXTEST_HAVE_EH
                setMaxDumpSize( _maxDumpSize );
            }
        };

        class WorldGuard : public StateGuard
        {
        public:
            WorldGuard() : StateGuard()
            {
#ifdef _CXXTEST_HAVE_EH
                setAbortTestOnFail( CXXTEST_DEFAULT_ABORT );
#endif // _CXXTEST_HAVE_EH
                setMaxDumpSize( CXXTEST_MAX_DUMP_SIZE );
            }
        };
    };

    //
    // For --no-static-init
    //
    void initialize();
}


#endif // __cxxtest_TestRunner_h__


================================================
FILE: cxxtest/cxxtest/TestSuite.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TestSuite_cpp__
#define __cxxtest__TestSuite_cpp__

#include <cxxtest/TestSuite.h>
#if defined(_CXXTEST_HAVE_STD)
#include <fstream>
#endif

namespace CxxTest
{
    //
    // TestSuite members
    //
    TestSuite::~TestSuite() {}
    void TestSuite::setUp() {}
    void TestSuite::tearDown() {}

    //
    // Test-aborting stuff
    //
    static bool currentAbortTestOnFail = false;

    bool abortTestOnFail()
    {
        return currentAbortTestOnFail;
    }

    void setAbortTestOnFail( bool value )
    {
        currentAbortTestOnFail = value;
    }
    
    void doAbortTest()
    {
#   if defined(_CXXTEST_HAVE_EH)
        if ( currentAbortTestOnFail )
            throw AbortTest();
#   endif // _CXXTEST_HAVE_EH
    }

    //
    // Max dump size
    //
    static unsigned currentMaxDumpSize = CXXTEST_MAX_DUMP_SIZE;

    unsigned maxDumpSize()
    {
        return currentMaxDumpSize;
    }
    
    void setMaxDumpSize( unsigned value )
    {
        currentMaxDumpSize = value;
    }

    //
    // Some non-template functions
    //
    void doTrace( const char *file, int line, const char *message )
    {
        if (tracker().print_tracing) {
           tracker().trace( file, line, message );
           }
    }

    void doWarn( const char *file, int line, const char *message )
    {
        tracker().warning( file, line, message );
    }

    void doFailTest( const char *file, int line, const char *message )
    {
        tracker().failedTest( file, line, message );
        TS_ABORT();
    }

    void doFailAssert( const char *file, int line,
                       const char *expression, const char *message )
    {
        if ( message )
            tracker().failedTest( file, line, message );
        tracker().failedAssert( file, line, expression );
        TS_ABORT();
    }

    bool sameData( const void *x, const void *y, unsigned size )
    {
        if ( size == 0 )
            return true;
        
        if ( x == y )
            return true;

        if ( !x || !y )
            return false;

        const char *cx = (const char *)x;
        const char *cy = (const char *)y;
        while ( size -- )
            if ( *cx++ != *cy++ )
                return false;

        return true;
    }

    void doAssertSameData( const char *file, int line,
                           const char *xExpr, const void *x,
                           const char *yExpr, const void *y,
                           const char *sizeExpr, unsigned size,
                           const char *message )
    {
        if ( !sameData( x, y, size ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertSameData( file, line, xExpr, yExpr, sizeExpr, x, y, size );
            TS_ABORT();
        }
    }

//#if defined(_CXXTEST_HAVE_STD)
    bool sameFiles( const char* file1, const char* file2, std::ostringstream& explanation)
    {
    std::string ppprev_line;
    std::string pprev_line;
    std::string prev_line;
    std::string curr_line;

    std::ifstream is1;
    is1.open(file1);
    std::ifstream is2;
    is2.open(file2);
    if (!is1) {
        explanation << "File '" << file1 << "' does not exist!";
        return false;
        }
    if (!is2) {
        explanation << "File '" << file2 << "' does not exist!";
        return false;
        }

    int nline=1;
    char c1, c2;
    while (1) {
        is1.get(c1);
        is2.get(c2);
        if (!is1 && !is2) return true;
        if (!is1) {
                explanation << "File '" << file1 << "' ended before file '" << file2 << "' (line " << nline << ")";
                explanation << std::endl << "= " << ppprev_line << std::endl << "=  " << pprev_line << std::endl << "= " << prev_line << std::endl << "< " << curr_line;
                is1.get(c1);
                while (is1 && (c1 != '\n')) {
                  explanation << c1;
                  is1.get(c1);
                  }
                explanation << std::endl;
                return false;
                }
        if (!is2) {
                explanation << "File '" << file2 << "' ended before file '" << file1 << "' (line " << nline << ")";
                explanation << std::endl << "= " << ppprev_line << std::endl << "=  " << pprev_line << std::endl << "= " << prev_line << std::endl << "> " << curr_line;
                is2.get(c2);
                while (is2 && (c2 != '\n')) {
                  explanation << c2;
                  is2.get(c2);
                  }
                explanation << std::endl;
                return false;
                }
        if (c1 != c2) {
                explanation << "Files '" << file1 << "' and '" << file2 << "' differ at line " << nline;
                explanation << std::endl << "= " << ppprev_line << std::endl << "=  " << pprev_line << std::endl << "= " << prev_line;

                explanation << std::endl << "< " << curr_line;
                is2.get(c1);
                while (is1 && (c1 != '\n')) {
                  explanation << c1;
                  is2.get(c1);
                  }
                explanation << std::endl;

                explanation << std::endl << "> " << curr_line;
                is2.get(c2);
                while (is2 && (c2 != '\n')) {
                  explanation << c2;
                  is2.get(c2);
                  }
                explanation << std::endl;

                return false;
                }
        if (c1 == '\n') {
           ppprev_line = pprev_line;
           pprev_line = prev_line;
           prev_line = curr_line;
           curr_line = "";
           nline++;
           }
        else {
           curr_line += c1;
           }
        }
    }
//#endif

    void doAssertSameFiles( const char* file, int line,
                            const char* file1, const char* file2,
                            const char* message)
    {
#if defined(_CXXTEST_HAVE_STD)
        std::ostringstream explanation;
        if ( !sameFiles( file1, file2, explanation ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertSameFiles( file, line, file1, file2, explanation.str().c_str());
            TS_ABORT();
        }
#else
        tracker().failedAssertSameFiles( file, line, file1, file2, "This test is only supported when --have-std is enabled");
        TS_ABORT();
#endif
    }

    void doFailAssertThrows( const char *file, int line,
                             const char *expr, const char *type,
                             bool otherThrown,
                             const char *message,
                             const char *exception )
    {
        if ( exception )
            tracker().failedTest( file, line, exception );
        if ( message )
            tracker().failedTest( file, line, message );
        
        tracker().failedAssertThrows( file, line, expr, type, otherThrown );
        TS_ABORT();
    }

    void doFailAssertThrowsNot( const char *file, int line,
                                const char *expression, const char *message,
                                const char *exception )
    {
        if ( exception )
            tracker().failedTest( file, line, exception );
        if ( message )
            tracker().failedTest( file, line, message );
        
        tracker().failedAssertThrowsNot( file, line, expression );
        TS_ABORT();
    }
}

#endif // __cxxtest__TestSuite_cpp__


================================================
FILE: cxxtest/cxxtest/TestSuite.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TestSuite_h__
#define __cxxtest__TestSuite_h__

//
// class TestSuite is the base class for all test suites.
// To define a test suite, derive from this class and add
// member functions called void test*();
//

#include <cxxtest/Flags.h>
#include <cxxtest/TestTracker.h>
#include <cxxtest/Descriptions.h>
#include <cxxtest/ValueTraits.h>
#include <sstream>

#if defined(_CXXTEST_HAVE_STD)
#   include <stdexcept>
#endif // _CXXTEST_HAVE_STD

namespace CxxTest
{
    class TestSuite
    {
    public:
        virtual ~TestSuite();
        virtual void setUp();
        virtual void tearDown();
    };
    
    class AbortTest {};
    void doAbortTest();
#   define TS_ABORT() CxxTest::doAbortTest()
    
    bool abortTestOnFail();
    void setAbortTestOnFail( bool value = CXXTEST_DEFAULT_ABORT );

    unsigned maxDumpSize();
    void setMaxDumpSize( unsigned value = CXXTEST_MAX_DUMP_SIZE );

    void doTrace( const char *file, int line, const char *message );
    void doWarn( const char *file, int line, const char *message );
    void doFailTest( const char *file, int line, const char *message );
    void doFailAssert( const char *file, int line, const char *expression, const char *message );

    template<class X, class Y>
    struct equals {
        static bool test( X x, Y y )
        {
            return (x == y);
        }
    };

    template<class X, class Y>
    void doAssertEquals( const char *file, int line,
                         const char *xExpr, X x,
                         const char *yExpr, Y y,
                         const char *message )
    {
        if ( !equals<X,Y>::test( x, y ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertEquals( file, line, xExpr, yExpr, TS_AS_STRING(x), TS_AS_STRING(y) );
            TS_ABORT();
        }
    }

    bool sameData( const void *x, const void *y, unsigned size );

    void doAssertSameData( const char *file, int line,
                           const char *xExpr, const void *x,
                           const char *yExpr, const void *y,
                           const char *sizeExpr, unsigned size,
                           const char *message );

//#if defined(_CXXTEST_HAVE_STD)
    bool sameFiles( const char* file1, const char* file2, std::ostringstream& explanation);
//#endif

    template<class X, class Y>
    struct differs {
        static bool test( X x, Y y )
        {
            return !(x == y);
        }
    };

    template<class X, class Y>
    void doAssertDiffers( const char *file, int line,
                          const char *xExpr, X x,
                          const char *yExpr, Y y,
                          const char *message )
    {
        if ( !differs<X,Y>::test( x, y ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertDiffers( file, line, xExpr, yExpr, TS_AS_STRING(x) );
            TS_ABORT();
        }
    }

    template<class X, class Y>
    struct lessThan {
        static bool test( X x, Y y )
        {
            return (x < y);
        }
    };

    template<class X, class Y>
    void doAssertLessThan( const char *file, int line,
                           const char *xExpr, X x,
                           const char *yExpr, Y y,
                           const char *message )
    {
        if ( !lessThan<X,Y>::test(x, y) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertLessThan( file, line, xExpr, yExpr, TS_AS_STRING(x), TS_AS_STRING(y) );
            TS_ABORT();
        }
    }

    template<class X, class Y>
    struct lessThanEquals {
        static bool test( X x, Y y )
        {
            return (x <= y);
        }
    };

    template<class X, class Y>
    void doAssertLessThanEquals( const char *file, int line,
                                 const char *xExpr, X x,
                                 const char *yExpr, Y y,
                                 const char *message )
    {
        if ( !lessThanEquals<X,Y>::test( x, y ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertLessThanEquals( file, line, xExpr, yExpr, TS_AS_STRING(x), TS_AS_STRING(y) );
            TS_ABORT();
        }
    }

    template<class X, class P>
    void doAssertPredicate( const char *file, int line,
                            const char *pExpr, const P &p,
                            const char *xExpr, X x,
                            const char *message )
    {
        if ( !p( x ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertPredicate( file, line, pExpr, xExpr, TS_AS_STRING(x) );
            TS_ABORT();
        }
    }

    template<class X, class Y, class R>
    void doAssertRelation( const char *file, int line,
                           const char *rExpr, const R &r, 
                           const char *xExpr, X x,
                           const char *yExpr, Y y,
                           const char *message )
    {
        if ( !r( x, y ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            tracker().failedAssertRelation( file, line, rExpr, xExpr, yExpr, TS_AS_STRING(x), TS_AS_STRING(y) );
            TS_ABORT();
        }
    }

    // An indirection template so the compiler can determine what type 
    // "X +/- D" should be
    template<class X, class Y>
    bool delta_le_helper( X x, Y y )
    { 
        return lessThanEquals<X,Y>::test(x,y); 
    }

    template<class X, class Y, class D>
    struct delta {
        static bool test( X x, Y y, D d )
        {
            return delta_le_helper(x-d, y) && delta_le_helper(y, x+d);
            //(y >= x - d) && (y <= x + d));
        }
    };

    template<class X, class Y, class D>
    void doAssertDelta( const char *file, int line,
                        const char *xExpr, X x,
                        const char *yExpr, Y y,
                        const char *dExpr, D d,
                        const char *message )
    {
        if ( !delta<X,Y,D>::test( x, y, d ) ) {
            if ( message )
                tracker().failedTest( file, line, message );
            
            tracker().failedAssertDelta( file, line, xExpr, yExpr, dExpr,
                                         TS_AS_STRING(x), TS_AS_STRING(y), TS_AS_STRING(d) );
            TS_ABORT();
        }
    }

    void doFailAssertThrows( const char *file, int line,
                             const char *expr, const char *type,
                             bool otherThrown,
                             const char *message,
                             const char *exception = 0 );
    
    void doFailAssertThrowsNot( const char *file, int line,
                                const char *expression, const char *message,
                                const char *exception = 0 );

    void doAssertSameFiles( const char* file, int line,
                            const char* file1, const char* file2,
                            const char* message);

#   ifdef _CXXTEST_HAVE_EH
#       define _TS_TRY try
#       define _TS_CATCH_TYPE(t, b) catch t b
#       define _TS_CATCH_ABORT(b) _TS_CATCH_TYPE( (const CxxTest::AbortTest &), b )
#       define _TS_LAST_CATCH(b) _TS_CATCH_TYPE( (...), b )
#       define _TSM_LAST_CATCH(f,l,m) _TS_LAST_CATCH( { (CxxTest::tracker()).failedTest(f,l,m); TS_ABORT(); } )
#       ifdef _CXXTEST_HAVE_STD
#           define _TS_CATCH_STD(e,b) _TS_CATCH_TYPE( (const std::exception& e), b )
#       else // !_CXXTEST_HAVE_STD
#           define _TS_CATCH_STD(e,b)
#       endif // _CXXTEST_HAVE_STD
#       define ___TSM_CATCH(f,l,m) \
            _TS_CATCH_STD(e, { (CxxTest::tracker()).failedTest(f,l,e.what()); TS_ABORT(); }) \
            _TSM_LAST_CATCH(f,l,m)
#       define __TSM_CATCH(f,l,m) \
                _TS_CATCH_ABORT( { throw; } ) \
                ___TSM_CATCH(f,l,m)
#       define __TS_CATCH(f,l) __TSM_CATCH(f,l,"Unhandled exception")
#       define _TS_CATCH __TS_CATCH(__FILE__,__LINE__)
#   else // !_CXXTEST_HAVE_EH
#       define _TS_TRY
#       define ___TSM_CATCH(f,l,m)
#       define __TSM_CATCH(f,l,m)
#       define __TS_CATCH(f,l)
#       define _TS_CATCH
#       define _TS_CATCH_TYPE(t, b)
#       define _TS_LAST_CATCH(b)
#       define _TS_CATCH_STD(e,b)
#       define _TS_CATCH_ABORT(b)
#   endif // _CXXTEST_HAVE_EH

    // TS_TRACE
#   define _TS_TRACE(f,l,e) CxxTest::doTrace( (f), (l), TS_AS_STRING(e) )
#   define TS_TRACE(e) _TS_TRACE( __FILE__, __LINE__, e )

    // TS_WARN
#   define _TS_WARN(f,l,e) CxxTest::doWarn( (f), (l), TS_AS_STRING(e) )
#   define TS_WARN(e) _TS_WARN( __FILE__, __LINE__, e )

    // TS_FAIL
#   define _TS_FAIL(f,l,e) CxxTest::doFailTest( (f), (l), TS_AS_STRING(e) )
#   define TS_FAIL(e) _TS_FAIL( __FILE__, __LINE__, e )

    // TS_ASSERT
#   define ___ETS_ASSERT(f,l,e,m) { if ( !(e) ) CxxTest::doFailAssert( (f), (l), #e, (m) ); }
#   define ___TS_ASSERT(f,l,e,m) { _TS_TRY { ___ETS_ASSERT(f,l,e,m); } __TS_CATCH(f,l) }
    
#   define _ETS_ASSERT(f,l,e) ___ETS_ASSERT(f,l,e,0)
#   define _TS_ASSERT(f,l,e) ___TS_ASSERT(f,l,e,0)
    
#   define ETS_ASSERT(e) _ETS_ASSERT(__FILE__,__LINE__,e)
#   define TS_ASSERT(e) _TS_ASSERT(__FILE__,__LINE__,e)
    
#   define _ETSM_ASSERT(f,l,m,e) ___ETS_ASSERT(f,l,e,TS_AS_STRING(m) )
#   define _TSM_ASSERT(f,l,m,e) ___TS_ASSERT(f,l,e,TS_AS_STRING(m) )

#   define ETSM_ASSERT(m,e) _ETSM_ASSERT(__FILE__,__LINE__,m,e)
#   define TSM_ASSERT(m,e) _TSM_ASSERT(__FILE__,__LINE__,m,e)
    
    // TS_ASSERT_EQUALS
#   define ___ETS_ASSERT_EQUALS(f,l,x,y,m) CxxTest::doAssertEquals( (f), (l), #x, (x), #y, (y), (m) )
#   define ___TS_ASSERT_EQUALS(f,l,x,y,m) { _TS_TRY { ___ETS_ASSERT_EQUALS(f,l,x,y,m); } __TS_CATCH(f,l) }
    
#   define _ETS_ASSERT_EQUALS(f,l,x,y) ___ETS_ASSERT_EQUALS(f,l,x,y,0)
#   define _TS_ASSERT_EQUALS(f,l,x,y) ___TS_ASSERT_EQUALS(f,l,x,y,0)

#   define ETS_ASSERT_EQUALS(x,y) _ETS_ASSERT_EQUALS(__FILE__,__LINE__,x,y)
#   define TS_ASSERT_EQUALS(x,y) _TS_ASSERT_EQUALS(__FILE__,__LINE__,x,y)

#   define _ETSM_ASSERT_EQUALS(f,l,m,x,y) ___ETS_ASSERT_EQUALS(f,l,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_EQUALS(f,l,m,x,y) ___TS_ASSERT_EQUALS(f,l,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_EQUALS(m,x,y) _ETSM_ASSERT_EQUALS(__FILE__,__LINE__,m,x,y)
#   define TSM_ASSERT_EQUALS(m,x,y) _TSM_ASSERT_EQUALS(__FILE__,__LINE__,m,x,y)

    // TS_ASSERT_SAME_DATA
#   define ___ETS_ASSERT_SAME_DATA(f,l,x,y,s,m) CxxTest::doAssertSameData( (f), (l), #x, (x), #y, (y), #s, (s), (m) )
#   define ___TS_ASSERT_SAME_DATA(f,l,x,y,s,m) { _TS_TRY { ___ETS_ASSERT_SAME_DATA(f,l,x,y,s,m); } __TS_CATCH(f,l) }
    
#   define _ETS_ASSERT_SAME_DATA(f,l,x,y,s) ___ETS_ASSERT_SAME_DATA(f,l,x,y,s,0)
#   define _TS_ASSERT_SAME_DATA(f,l,x,y,s) ___TS_ASSERT_SAME_DATA(f,l,x,y,s,0)

#   define ETS_ASSERT_SAME_DATA(x,y,s) _ETS_ASSERT_SAME_DATA(__FILE__,__LINE__,x,y,s)
#   define TS_ASSERT_SAME_DATA(x,y,s) _TS_ASSERT_SAME_DATA(__FILE__,__LINE__,x,y,s)

#   define _ETSM_ASSERT_SAME_DATA(f,l,m,x,y,s) ___ETS_ASSERT_SAME_DATA(f,l,x,y,s,TS_AS_STRING(m))
#   define _TSM_ASSERT_SAME_DATA(f,l,m,x,y,s) ___TS_ASSERT_SAME_DATA(f,l,x,y,s,TS_AS_STRING(m))

#   define ETSM_ASSERT_SAME_DATA(m,x,y,s) _ETSM_ASSERT_SAME_DATA(__FILE__,__LINE__,m,x,y,s)
#   define TSM_ASSERT_SAME_DATA(m,x,y,s) _TSM_ASSERT_SAME_DATA(__FILE__,__LINE__,m,x,y,s)

    // TS_ASSERT_DIFFERS
#   define ___ETS_ASSERT_DIFFERS(f,l,x,y,m) CxxTest::doAssertDiffers( (f), (l), #x, (x), #y, (y), (m) )
#   define ___TS_ASSERT_DIFFERS(f,l,x,y,m) { _TS_TRY { ___ETS_ASSERT_DIFFERS(f,l,x,y,m); } __TS_CATCH(f,l) }

#   define _ETS_ASSERT_DIFFERS(f,l,x,y) ___ETS_ASSERT_DIFFERS(f,l,x,y,0)
#   define _TS_ASSERT_DIFFERS(f,l,x,y) ___TS_ASSERT_DIFFERS(f,l,x,y,0)

#   define ETS_ASSERT_DIFFERS(x,y) _ETS_ASSERT_DIFFERS(__FILE__,__LINE__,x,y)
#   define TS_ASSERT_DIFFERS(x,y) _TS_ASSERT_DIFFERS(__FILE__,__LINE__,x,y)

#   define _ETSM_ASSERT_DIFFERS(f,l,m,x,y) ___ETS_ASSERT_DIFFERS(f,l,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_DIFFERS(f,l,m,x,y) ___TS_ASSERT_DIFFERS(f,l,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_DIFFERS(m,x,y) _ETSM_ASSERT_DIFFERS(__FILE__,__LINE__,m,x,y)
#   define TSM_ASSERT_DIFFERS(m,x,y) _TSM_ASSERT_DIFFERS(__FILE__,__LINE__,m,x,y)

    // TS_ASSERT_LESS_THAN
#   define ___ETS_ASSERT_LESS_THAN(f,l,x,y,m) CxxTest::doAssertLessThan( (f), (l), #x, (x), #y, (y), (m) )
#   define ___TS_ASSERT_LESS_THAN(f,l,x,y,m) { _TS_TRY { ___ETS_ASSERT_LESS_THAN(f,l,x,y,m); } __TS_CATCH(f,l) }

#   define _ETS_ASSERT_LESS_THAN(f,l,x,y) ___ETS_ASSERT_LESS_THAN(f,l,x,y,0)
#   define _TS_ASSERT_LESS_THAN(f,l,x,y) ___TS_ASSERT_LESS_THAN(f,l,x,y,0)

#   define ETS_ASSERT_LESS_THAN(x,y) _ETS_ASSERT_LESS_THAN(__FILE__,__LINE__,x,y)
#   define TS_ASSERT_LESS_THAN(x,y) _TS_ASSERT_LESS_THAN(__FILE__,__LINE__,x,y)

#   define _ETSM_ASSERT_LESS_THAN(f,l,m,x,y) ___ETS_ASSERT_LESS_THAN(f,l,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_LESS_THAN(f,l,m,x,y) ___TS_ASSERT_LESS_THAN(f,l,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_LESS_THAN(m,x,y) _ETSM_ASSERT_LESS_THAN(__FILE__,__LINE__,m,x,y)
#   define TSM_ASSERT_LESS_THAN(m,x,y) _TSM_ASSERT_LESS_THAN(__FILE__,__LINE__,m,x,y)

    // TS_ASSERT_LESS_THAN_EQUALS
#   define ___ETS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,m) \
        CxxTest::doAssertLessThanEquals( (f), (l), #x, (x), #y, (y), (m) )
#   define ___TS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,m) \
        { _TS_TRY { ___ETS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,m); } __TS_CATCH(f,l) }

#   define _ETS_ASSERT_LESS_THAN_EQUALS(f,l,x,y) ___ETS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,0)
#   define _TS_ASSERT_LESS_THAN_EQUALS(f,l,x,y) ___TS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,0)

#   define ETS_ASSERT_LESS_THAN_EQUALS(x,y) _ETS_ASSERT_LESS_THAN_EQUALS(__FILE__,__LINE__,x,y)
#   define TS_ASSERT_LESS_THAN_EQUALS(x,y) _TS_ASSERT_LESS_THAN_EQUALS(__FILE__,__LINE__,x,y)

#   define _ETSM_ASSERT_LESS_THAN_EQUALS(f,l,m,x,y) ___ETS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_LESS_THAN_EQUALS(f,l,m,x,y) ___TS_ASSERT_LESS_THAN_EQUALS(f,l,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_LESS_THAN_EQUALS(m,x,y) _ETSM_ASSERT_LESS_THAN_EQUALS(__FILE__,__LINE__,m,x,y)
#   define TSM_ASSERT_LESS_THAN_EQUALS(m,x,y) _TSM_ASSERT_LESS_THAN_EQUALS(__FILE__,__LINE__,m,x,y)

    // TS_ASSERT_PREDICATE
#   define ___ETS_ASSERT_PREDICATE(f,l,p,x,m) \
        CxxTest::doAssertPredicate( (f), (l), #p, p(), #x, (x), (m) )
#   define ___TS_ASSERT_PREDICATE(f,l,p,x,m) \
        { _TS_TRY { ___ETS_ASSERT_PREDICATE(f,l,p,x,m); } __TS_CATCH(f,l) }

#   define _ETS_ASSERT_PREDICATE(f,l,p,x) ___ETS_ASSERT_PREDICATE(f,l,p,x,0)
#   define _TS_ASSERT_PREDICATE(f,l,p,x) ___TS_ASSERT_PREDICATE(f,l,p,x,0)

#   define ETS_ASSERT_PREDICATE(p,x) _ETS_ASSERT_PREDICATE(__FILE__,__LINE__,p,x)
#   define TS_ASSERT_PREDICATE(p,x) _TS_ASSERT_PREDICATE(__FILE__,__LINE__,p,x)

#   define _ETSM_ASSERT_PREDICATE(f,l,m,p,x) ___ETS_ASSERT_PREDICATE(f,l,p,x,TS_AS_STRING(m))
#   define _TSM_ASSERT_PREDICATE(f,l,m,p,x) ___TS_ASSERT_PREDICATE(f,l,p,x,TS_AS_STRING(m))

#   define ETSM_ASSERT_PREDICATE(m,p,x) _ETSM_ASSERT_PREDICATE(__FILE__,__LINE__,m,p,x)
#   define TSM_ASSERT_PREDICATE(m,p,x) _TSM_ASSERT_PREDICATE(__FILE__,__LINE__,m,p,x)

    // TS_ASSERT_RELATION
#   define ___ETS_ASSERT_RELATION(f,l,r,x,y,m) \
        CxxTest::doAssertRelation( (f), (l), #r, r(), #x, (x), #y, (y), (m) )
#   define ___TS_ASSERT_RELATION(f,l,r,x,y,m) \
        { _TS_TRY { ___ETS_ASSERT_RELATION(f,l,r,x,y,m); } __TS_CATCH(f,l) }

#   define _ETS_ASSERT_RELATION(f,l,r,x,y) ___ETS_ASSERT_RELATION(f,l,r,x,y,0)
#   define _TS_ASSERT_RELATION(f,l,r,x,y) ___TS_ASSERT_RELATION(f,l,r,x,y,0)

#   define ETS_ASSERT_RELATION(r,x,y) _ETS_ASSERT_RELATION(__FILE__,__LINE__,r,x,y)
#   define TS_ASSERT_RELATION(r,x,y) _TS_ASSERT_RELATION(__FILE__,__LINE__,r,x,y)

#   define _ETSM_ASSERT_RELATION(f,l,m,r,x,y) ___ETS_ASSERT_RELATION(f,l,r,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_RELATION(f,l,m,r,x,y) ___TS_ASSERT_RELATION(f,l,r,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_RELATION(m,r,x,y) _ETSM_ASSERT_RELATION(__FILE__,__LINE__,m,r,x,y)
#   define TSM_ASSERT_RELATION(m,r,x,y) _TSM_ASSERT_RELATION(__FILE__,__LINE__,m,r,x,y)

    // TS_ASSERT_DELTA
#   define ___ETS_ASSERT_DELTA(f,l,x,y,d,m) CxxTest::doAssertDelta( (f), (l), #x, (x), #y, (y), #d, (d), (m) )
#   define ___TS_ASSERT_DELTA(f,l,x,y,d,m) { _TS_TRY { ___ETS_ASSERT_DELTA(f,l,x,y,d,m); } __TS_CATCH(f,l) }
    
#   define _ETS_ASSERT_DELTA(f,l,x,y,d) ___ETS_ASSERT_DELTA(f,l,x,y,d,0)
#   define _TS_ASSERT_DELTA(f,l,x,y,d) ___TS_ASSERT_DELTA(f,l,x,y,d,0)

#   define ETS_ASSERT_DELTA(x,y,d) _ETS_ASSERT_DELTA(__FILE__,__LINE__,x,y,d)
#   define TS_ASSERT_DELTA(x,y,d) _TS_ASSERT_DELTA(__FILE__,__LINE__,x,y,d)

#   define _ETSM_ASSERT_DELTA(f,l,m,x,y,d) ___ETS_ASSERT_DELTA(f,l,x,y,d,TS_AS_STRING(m))
#   define _TSM_ASSERT_DELTA(f,l,m,x,y,d) ___TS_ASSERT_DELTA(f,l,x,y,d,TS_AS_STRING(m))

#   define ETSM_ASSERT_DELTA(m,x,y,d) _ETSM_ASSERT_DELTA(__FILE__,__LINE__,m,x,y,d)
#   define TSM_ASSERT_DELTA(m,x,y,d) _TSM_ASSERT_DELTA(__FILE__,__LINE__,m,x,y,d)

    // TS_ASSERT_SAME_FILES
#   define ___ETS_ASSERT_SAME_FILES(f,l,x,y,m) CxxTest::doAssertSameFiles( (f), (l), (x), (y), (m) )
#   define ___TS_ASSERT_SAME_FILES(f,l,x,y,m) { _TS_TRY { ___ETS_ASSERT_SAME_FILES(f,l,x,y,m); } __TS_CATCH(f,l) }
    
#   define _ETS_ASSERT_SAME_FILES(f,l,x,y) ___ETS_ASSERT_SAME_FILES(f,l,x,y,0)
#   define _TS_ASSERT_SAME_FILES(f,l,x,y) ___TS_ASSERT_SAME_FILES(f,l,x,y,0)

#   define ETS_ASSERT_SAME_FILES(x,y) _ETS_ASSERT_SAME_FILES(__FILE__,__LINE__,x,y)
#   define TS_ASSERT_SAME_FILES(x,y) _TS_ASSERT_SAME_FILES(__FILE__,__LINE__,x,y)

#   define _ETSM_ASSERT_SAME_FILES(f,l,m,x,y) ___ETS_ASSERT_SAME_FILES(f,l,x,y,TS_AS_STRING(m))
#   define _TSM_ASSERT_SAME_FILES(f,l,m,x,y) ___TS_ASSERT_SAME_FILES(f,l,x,y,TS_AS_STRING(m))

#   define ETSM_ASSERT_SAME_FILES(m,x,y) _ETSM_ASSERT_SAME_FILES(__FILE__,__LINE__,m,x,y)
#   define TSM_ASSERT_SAME_FILES(m,x,y) _TSM_ASSERT_SAME_FILES(__FILE__,__LINE__,m,x,y)


    // TS_ASSERT_THROWS
#   define ___TS_ASSERT_THROWS(f,l,e,t,m) ___TS_ASSERT_THROWS_ASSERT(f,l,e,t,(void)0,m)

#   define _TS_ASSERT_THROWS(f,l,e,t) ___TS_ASSERT_THROWS(f,l,e,t,0)
#   define TS_ASSERT_THROWS(e,t) _TS_ASSERT_THROWS(__FILE__,__LINE__,e,t)

#   define _TSM_ASSERT_THROWS(f,l,m,e,t) ___TS_ASSERT_THROWS(f,l,e,t,TS_AS_STRING(m))
#   define TSM_ASSERT_THROWS(m,e,t) _TSM_ASSERT_THROWS(__FILE__,__LINE__,m,e,t)

    // TS_ASSERT_THROWS_ASSERT
#   define ___TS_ASSERT_THROWS_ASSERT(f,l,e,t,a,m) { \
            bool _ts_threw_expected = false, _ts_threw_else = false; \
            _TS_TRY { e; } \
            _TS_CATCH_TYPE( (t), { a; _ts_threw_expected = true; } ) \
            _TS_CATCH_ABORT( { throw; } ) \
            _TS_CATCH_STD( ex, { _ts_threw_expected = true; CxxTest::doFailAssertThrows((f), (l), #e, #t, true, (m), ex.what() ); } ) \
            _TS_LAST_CATCH( { _ts_threw_else = true; } ) \
            if ( !_ts_threw_expected ) { CxxTest::doFailAssertThrows( (f), (l), #e, #t, _ts_threw_else, (m), 0 ); } }

#   define _TS_ASSERT_THROWS_ASSERT(f,l,e,t,a) ___TS_ASSERT_THROWS_ASSERT(f,l,e,t,a,0)
#   define TS_ASSERT_THROWS_ASSERT(e,t,a) _TS_ASSERT_THROWS_ASSERT(__FILE__,__LINE__,e,t,a)

#   define _TSM_ASSERT_THROWS_ASSERT(f,l,m,e,t,a) ___TS_ASSERT_THROWS_ASSERT(f,l,e,t,a,TS_AS_STRING(m))
#   define TSM_ASSERT_THROWS_ASSERT(m,e,t,a) _TSM_ASSERT_THROWS_ASSERT(__FILE__,__LINE__,m,e,t,a)

    // TS_ASSERT_THROWS_EQUALS
#   define TS_ASSERT_THROWS_EQUALS(e,t,x,y) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_EQUALS(x,y))
#   define TSM_ASSERT_THROWS_EQUALS(m,e,t,x,y) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_EQUALS(m,x,y))

    // TS_ASSERT_THROWS_DIFFERS
#   define TS_ASSERT_THROWS_DIFFERS(e,t,x,y) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_DIFFERS(x,y))
#   define TSM_ASSERT_THROWS_DIFFERS(m,e,t,x,y) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_DIFFERS(m,x,y))

    // TS_ASSERT_THROWS_DELTA
#   define TS_ASSERT_THROWS_DELTA(e,t,x,y,d) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_DELTA(x,y,d))
#   define TSM_ASSERT_THROWS_DELTA(m,e,t,x,y,d) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_DELTA(m,x,y,d))

    // TS_ASSERT_THROWS_SAME_DATA
#   define TS_ASSERT_THROWS_SAME_DATA(e,t,x,y,s) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_SAME_DATA(x,y,s))
#   define TSM_ASSERT_THROWS_SAME_DATA(m,e,t,x,y,s) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_SAME_DATA(m,x,y,s))

    // TS_ASSERT_THROWS_LESS_THAN
#   define TS_ASSERT_THROWS_LESS_THAN(e,t,x,y) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_LESS_THAN(x,y))
#   define TSM_ASSERT_THROWS_LESS_THAN(m,e,t,x,y) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_LESS_THAN(m,x,y))

    // TS_ASSERT_THROWS_LESS_THAN_EQUALS
#   define TS_ASSERT_THROWS_LESS_THAN_EQUALS(e,t,x,y) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_LESS_THAN_EQUALS(x,y))
#   define TSM_ASSERT_THROWS_LESS_THAN_EQUALS(m,e,t,x,y) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_LESS_THAN_EQUALS(m,x,y))

    // TS_ASSERT_THROWS_PREDICATE
#   define TS_ASSERT_THROWS_PREDICATE(e,t,p,v) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_PREDICATE(p,v))
#   define TSM_ASSERT_THROWS_PREDICATE(m,e,t,p,v) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_PREDICATE(m,p,v))

    // TS_ASSERT_THROWS_RELATION
#   define TS_ASSERT_THROWS_RELATION(e,t,r,x,y) TS_ASSERT_THROWS_ASSERT(e,t,TS_ASSERT_RELATION(r,x,y))
#   define TSM_ASSERT_THROWS_RELATION(m,e,t,r,x,y) TSM_ASSERT_THROWS_ASSERT(m,e,t,TSM_ASSERT_RELATION(m,r,x,y))

    // TS_ASSERT_THROWS_ANYTHING
#   define ___TS_ASSERT_THROWS_ANYTHING(f,l,e,m) { \
            bool _ts_threw = false; \
            _TS_TRY { e; } \
            _TS_LAST_CATCH( { _ts_threw = true; } ) \
            if ( !_ts_threw ) { CxxTest::doFailAssertThrows( (f), (l), #e, "...", false, (m) ); } }

#   define _TS_ASSERT_THROWS_ANYTHING(f,l,e) ___TS_ASSERT_THROWS_ANYTHING(f,l,e,0)
#   define TS_ASSERT_THROWS_ANYTHING(e) _TS_ASSERT_THROWS_ANYTHING(__FILE__, __LINE__, e)

#   define _TSM_ASSERT_THROWS_ANYTHING(f,l,m,e) ___TS_ASSERT_THROWS_ANYTHING(f,l,e,TS_AS_STRING(m))
#   define TSM_ASSERT_THROWS_ANYTHING(m,e) _TSM_ASSERT_THROWS_ANYTHING(__FILE__,__LINE__,m,e)

    // TS_ASSERT_THROWS_NOTHING
#   define ___TS_ASSERT_THROWS_NOTHING(f,l,e,m) { \
            _TS_TRY { e; } \
            _TS_CATCH_ABORT( { throw; } ) \
            _TS_CATCH_STD(ex, { CxxTest::doFailAssertThrowsNot( (f), (l), #e, (m), ex.what() ); } ) \
            _TS_LAST_CATCH( { CxxTest::doFailAssertThrowsNot( (f), (l), #e, (m), 0 ); } ) }

#   define _TS_ASSERT_THROWS_NOTHING(f,l,e) ___TS_ASSERT_THROWS_NOTHING(f,l,e,0)
#   define TS_ASSERT_THROWS_NOTHING(e) _TS_ASSERT_THROWS_NOTHING(__FILE__,__LINE__,e)

#   define _TSM_ASSERT_THROWS_NOTHING(f,l,m,e) ___TS_ASSERT_THROWS_NOTHING(f,l,e,TS_AS_STRING(m))
#   define TSM_ASSERT_THROWS_NOTHING(m,e) _TSM_ASSERT_THROWS_NOTHING(__FILE__,__LINE__,m,e)


    //
    // This takes care of "signed <-> unsigned" warnings
    //
#   define CXXTEST_COMPARISONS(CXXTEST_X, CXXTEST_Y, CXXTEST_T) \
    template<> struct equals<CXXTEST_X,CXXTEST_Y> {                                           \
        static bool test(CXXTEST_X x,CXXTEST_Y y) {                                           \
            return equals<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };         \
    template<> struct equals<CXXTEST_Y,CXXTEST_X> {                                           \
        static bool test(CXXTEST_Y x,CXXTEST_X y) {                                           \
            return equals<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };         \
    template<> struct differs<CXXTEST_X,CXXTEST_Y> {                                          \
        static bool test(CXXTEST_X x,CXXTEST_Y y) {                                           \
            return differs<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };        \
    template<> struct differs<CXXTEST_Y,CXXTEST_X> {                                          \
        static bool test(CXXTEST_Y x,CXXTEST_X y) {                                           \
            return differs<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };        \
    template<> struct lessThan<CXXTEST_X,CXXTEST_Y> {                                         \
        static bool test(CXXTEST_X x,CXXTEST_Y y) {                                           \
            return lessThan<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };       \
    template<> struct lessThan<CXXTEST_Y,CXXTEST_X> {                                         \
        static bool test(CXXTEST_Y x,CXXTEST_X y) {                                           \
            return lessThan<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } };       \
    template<> struct lessThanEquals<CXXTEST_X,CXXTEST_Y> {                                   \
        static bool test(CXXTEST_X x,CXXTEST_Y y) {                                           \
            return lessThanEquals<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } }; \
    template<> struct lessThanEquals<CXXTEST_Y,CXXTEST_X> {                                   \
        static bool test(CXXTEST_Y x,CXXTEST_X y) {                                           \
            return lessThanEquals<CXXTEST_T,CXXTEST_T>::test((CXXTEST_T)x,(CXXTEST_T)y); } }
#if 0
    // These specializations are not needed because delta makes use of
    // CxxTest::lessThanEquals for the actual comparison
    template<class D> struct delta<CXXTEST_X,CXXTEST_Y, D> {              \
        static bool test(CXXTEST_X x,CXXTEST_Y y, D d) {              \
            return delta<CXXTEST_T,CXXTEST_T,D>::test((CXXTEST_T)x,(CXXTEST_T)y, d); } }; \
    template<class D> struct delta<CXXTEST_Y,CXXTEST_X, D> {              \
        static bool test(CXXTEST_Y x,CXXTEST_X y, D d) {              \
            return delta<CXXTEST_T,CXXTEST_T,D>::test((CXXTEST_T)x,(CXXTEST_T)y, d); } }
#endif

#   define CXXTEST_INTEGRAL(CXXTEST_T) \
    CXXTEST_COMPARISONS( signed CXXTEST_T, unsigned CXXTEST_T, unsigned CXXTEST_T )

    CXXTEST_INTEGRAL( char );
    CXXTEST_INTEGRAL( short );
    CXXTEST_INTEGRAL( int );
    CXXTEST_INTEGRAL( long );
#   ifdef _CXXTEST_LONGLONG
    CXXTEST_INTEGRAL( _CXXTEST_LONGLONG );
#   endif // _CXXTEST_LONGLONG

#   define CXXTEST_SMALL_BIG(CXXTEST_SMALL, CXXTEST_BIG) \
    CXXTEST_COMPARISONS( signed CXXTEST_SMALL, unsigned CXXTEST_BIG, unsigned CXXTEST_BIG ); \
    CXXTEST_COMPARISONS( signed CXXTEST_BIG, unsigned CXXTEST_SMALL, unsigned CXXTEST_BIG )

    CXXTEST_SMALL_BIG( char, short );
    CXXTEST_SMALL_BIG( char, int );
    CXXTEST_SMALL_BIG( short, int );
    CXXTEST_SMALL_BIG( char, long );
    CXXTEST_SMALL_BIG( short, long );
    CXXTEST_SMALL_BIG( int, long );
        
#   ifdef _CXXTEST_LONGLONG
    CXXTEST_SMALL_BIG( char, _CXXTEST_LONGLONG );
    CXXTEST_SMALL_BIG( short, _CXXTEST_LONGLONG );
    CXXTEST_SMALL_BIG( int, _CXXTEST_LONGLONG );
    CXXTEST_SMALL_BIG( long, _CXXTEST_LONGLONG );
#   endif // _CXXTEST_LONGLONG
}

#ifdef _CXXTEST_HAVE_STD
#   include <cxxtest/StdTestSuite.h>
#endif // _CXXTEST_HAVE_STD

#endif // __cxxtest__TestSuite_h__


================================================
FILE: cxxtest/cxxtest/TestTracker.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TestTracker_cpp__
#define __cxxtest__TestTracker_cpp__

#include <cxxtest/TestTracker.h>

namespace CxxTest
{
    bool TestTracker::_created = false;
    bool TestTracker::print_tracing = false;

    TestTracker::TestTracker()
    {
        if ( !_created ) {
            initialize();
            setListener( 0 );
            _created = true;
        }
    }

    TestTracker::~TestTracker()
    {
    }
    
    TestTracker & TestTracker::tracker()
    {
        static TestTracker theTracker;
        return theTracker;
    }

    void TestTracker::initialize()
    {
        _warnings = 0;
        _failedTests = 0;
        _testFailedAsserts = 0;
        _suiteFailedTests = 0;
        _failedSuites = 0;
        _world = 0;
        _suite = 0;
        _test = 0;
    }

    const TestDescription *TestTracker::fixTest( const TestDescription *d ) const
    {
        return d ? d : &dummyTest();
    }
    
    const SuiteDescription *TestTracker::fixSuite( const SuiteDescription *d ) const
    {
        return d ? d : &dummySuite();
    }
    
    const WorldDescription *TestTracker::fixWorld( const WorldDescription *d ) const
    {
        return d ? d : &dummyWorld();
    }
    
    const TestDescription &TestTracker::dummyTest() const
    {
        return dummySuite().testDescription(0);
    }
    
    const SuiteDescription &TestTracker::dummySuite() const
    {
        return dummyWorld().suiteDescription(0);
    }
    
    const WorldDescription &TestTracker::dummyWorld() const
    {
        return _dummyWorld;
    }

    void TestTracker::setListener( TestListener *l )
    {
        _l = l ? l : &_dummyListener;
    }

    void TestTracker::enterWorld( const WorldDescription &wd )
    {
        setWorld( &wd );
        _warnings = _failedTests = _testFailedAsserts = _suiteFailedTests = _failedSuites = 0;
        _l->enterWorld( wd );
    }

    void TestTracker::enterSuite( const SuiteDescription &sd )
    {
        setSuite( &sd );
        _testFailedAsserts = _suiteFailedTests = 0;
        _l->enterSuite(sd);
    }
        
    void TestTracker::enterTest( const TestDescription &td )
    {
        setTest( &td );
        _testFailedAsserts = false;
        _l->enterTest(td);
    }

    void TestTracker::leaveTest( const TestDescription &td )
    {
        _l->leaveTest( td );
        setTest( 0 );
    }

    void TestTracker::leaveSuite( const SuiteDescription &sd )
    {
        _l->leaveSuite( sd );
        setSuite( 0 );
    }

    void TestTracker::leaveWorld( const WorldDescription &wd )
    {
        _l->leaveWorld( wd );
        setWorld( 0 );
    }

    void TestTracker::trace( const char *file, int line, const char *expression )
    {
        _l->trace( file, line, expression );
    }

    void TestTracker::warning( const char *file, int line, const char *expression )
    {
        countWarning();
        _l->warning( file, line, expression );
    }

    void TestTracker::failedTest( const char *file, int line, const char *expression )
    {
        countFailure();
        _l->failedTest( file, line, expression );
    }
        
    void TestTracker::failedAssert( const char *file, int line, const char *expression )
    {
        countFailure();
        _l->failedAssert( file, line, expression );
    }

    void TestTracker::failedAssertEquals( const char *file, int line,
                                          const char *xStr, const char *yStr,
                                          const char *x, const char *y )
    {
        countFailure();
        _l->failedAssertEquals( file, line, xStr, yStr, x, y );
    }

    void TestTracker::failedAssertSameData( const char *file, int line,
                                            const char *xStr, const char *yStr,
                                            const char *sizeStr, const void *x,
                                            const void *y, unsigned size )
    {
        countFailure();
        _l->failedAssertSameData( file, line, xStr, yStr, sizeStr, x, y, size );
    }

    void TestTracker::failedAssertDelta( const char *file, int line,
                                         const char *xStr, const char *yStr, const char *dStr,
                                         const char *x, const char *y, const char *d )
    {
        countFailure();
        _l->failedAssertDelta( file, line, xStr, yStr, dStr, x, y, d );
    }
    
    void TestTracker::failedAssertDiffers( const char *file, int line,
                                           const char *xStr, const char *yStr,
                                           const char *value )
    {
        countFailure();
        _l->failedAssertDiffers( file, line, xStr, yStr, value );
    }
        
    void TestTracker::failedAssertLessThan( const char *file, int line,
                                            const char *xStr, const char *yStr,
                                            const char *x, const char *y )
    {
        countFailure();
        _l->failedAssertLessThan( file, line, xStr, yStr, x, y );
    }

    void TestTracker::failedAssertLessThanEquals( const char *file, int line,
                                                  const char *xStr, const char *yStr,
                                                  const char *x, const char *y )
    {
        countFailure();
        _l->failedAssertLessThanEquals( file, line, xStr, yStr, x, y );
    }

    void TestTracker::failedAssertPredicate( const char *file, int line,
                                             const char *predicate, const char *xStr, const char *x )
    {
        countFailure();
        _l->failedAssertPredicate( file, line, predicate, xStr, x );
    }
        
    void TestTracker::failedAssertRelation( const char *file, int line,
                                            const char *relation, const char *xStr, const char *yStr,
                                            const char *x, const char *y )
    {
        countFailure();
        _l->failedAssertRelation( file, line, relation, xStr, yStr, x, y );
    }
        
    void TestTracker::failedAssertThrows( const char *file, int line,
                                          const char *expression, const char *type,
                                          bool otherThrown )
    {
        countFailure();
        _l->failedAssertThrows( file, line, expression, type, otherThrown );
    }
        
    void TestTracker::failedAssertThrowsNot( const char *file, int line, const char *expression )
    {
        countFailure();
        _l->failedAssertThrowsNot( file, line, expression );
    }

    void TestTracker::failedAssertSameFiles( const char *file, int line, const char *file1, const char* file2, const char* explanation )
    {
        countFailure();
        _l->failedAssertSameFiles( file, line, file1, file2, explanation );
    }

    void TestTracker::setWorld( const WorldDescription *w )
    {
        _world = fixWorld( w );
        setSuite( 0 );
    }

    void TestTracker::setSuite( const SuiteDescription *s )
    {
        _suite = fixSuite( s );
        setTest( 0 );
    }

    void TestTracker::setTest( const TestDescription *t )
    {
        _test = fixTest( t );
    }

    void TestTracker::countWarning()
    {
        ++ _warnings;
    }

    void TestTracker::countFailure()
    {
        if ( ++ _testFailedAsserts == 1 ) {
            ++ _failedTests;
            if ( ++ _suiteFailedTests == 1 )
                ++ _failedSuites;
        }
    }
}

#endif // __cxxtest__TestTracker_cpp__


================================================
FILE: cxxtest/cxxtest/TestTracker.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__TestTracker_h__
#define __cxxtest__TestTracker_h__

//
// The TestTracker tracks running tests
// The actual work is done in CountingListenerProxy,
// but this way avoids cyclic references TestListener<->CountingListenerProxy
//

#include <cxxtest/TestListener.h>
#include <cxxtest/DummyDescriptions.h>

namespace CxxTest
{
    class TestListener;
    
    class TestTracker : public TestListener
    {
    public:
        virtual ~TestTracker();
        
        static TestTracker &tracker();
        static bool print_tracing;

        const TestDescription *fixTest( const TestDescription *d ) const;
        const SuiteDescription *fixSuite( const SuiteDescription *d ) const;
        const WorldDescription *fixWorld( const WorldDescription *d ) const;

        const TestDescription &test() const { return *_test; }
        const SuiteDescription &suite() const { return *_suite; }
        const WorldDescription &world() const { return *_world; }
        
        bool testFailed() const { return (testFailedAsserts() > 0); }
        bool suiteFailed() const { return (suiteFailedTests() > 0); }
        bool worldFailed() const { return (failedSuites() > 0); }
        
        unsigned warnings() const { return _warnings; }
        unsigned failedTests() const { return _failedTests; }
        unsigned testFailedAsserts() const { return _testFailedAsserts; }
        unsigned suiteFailedTests() const { return _suiteFailedTests; }
        unsigned failedSuites() const { return _failedSuites; }

        void enterWorld( const WorldDescription &wd );
        void enterSuite( const SuiteDescription &sd );
        void enterTest( const TestDescription &td );
        void leaveTest( const TestDescription &td );
        void leaveSuite( const SuiteDescription &sd );
        void leaveWorld( const WorldDescription &wd );
        void trace( const char *file, int line, const char *expression );
        void warning( const char *file, int line, const char *expression );
        void failedTest( const char *file, int line, const char *expression );
        void failedAssert( const char *file, int line, const char *expression );
        void failedAssertEquals( const char *file, int line,
                                 const char *xStr, const char *yStr,
                                 const char *x, const char *y );
        void failedAssertSameData( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *sizeStr, const void *x,
                                   const void *y, unsigned size );
        void failedAssertDelta( const char *file, int line,
                                const char *xStr, const char *yStr, const char *dStr,
                                const char *x, const char *y, const char *d );
        void failedAssertDiffers( const char *file, int line,
                                  const char *xStr, const char *yStr,
                                  const char *value );
        void failedAssertLessThan( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *x, const char *y );
        void failedAssertLessThanEquals( const char *file, int line,
                                         const char *xStr, const char *yStr,
                                         const char *x, const char *y );
        void failedAssertPredicate( const char *file, int line,
                                    const char *predicate, const char *xStr, const char *x );
        void failedAssertRelation( const char *file, int line,
                                   const char *relation, const char *xStr, const char *yStr,
                                   const char *x, const char *y );
        void failedAssertThrows( const char *file, int line,
                                 const char *expression, const char *type,
                                 bool otherThrown );
        void failedAssertThrowsNot( const char *file, int line, const char *expression );
        void failedAssertSameFiles( const char* file, int line, const char* file1, const char* file2, const char* explanation);
        
        void initialize();

    private:
        TestTracker( const TestTracker & );
        TestTracker &operator=( const TestTracker & );

        static bool _created;
        TestListener _dummyListener;
        DummyWorldDescription _dummyWorld;
        unsigned _warnings, _failedTests, _testFailedAsserts, _suiteFailedTests, _failedSuites;
        TestListener *_l;
        const WorldDescription *_world;
        const SuiteDescription *_suite;
        const TestDescription *_test;

        const TestDescription &dummyTest() const;
        const SuiteDescription &dummySuite() const;
        const WorldDescription &dummyWorld() const;
        
        void setWorld( const WorldDescription *w );
        void setSuite( const SuiteDescription *s );
        void setTest( const TestDescription *t );
        void countWarning();
        void countFailure();

        friend class TestRunner;
        
        TestTracker();
        void setListener( TestListener *l );
    };

    inline TestTracker &tracker() { return TestTracker::tracker(); }
}

#endif // __cxxtest__TestTracker_h__


================================================
FILE: cxxtest/cxxtest/ValueTraits.cpp
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__ValueTraits_cpp__
#define __cxxtest__ValueTraits_cpp__

#include <cxxtest/ValueTraits.h>

namespace CxxTest 
{
    //
    // Non-inline functions from ValueTraits.h
    //
    
    char digitToChar( unsigned digit )
    {
        if ( digit < 10 )
            return (char)('0' + digit);
        if ( digit <= 10 + 'Z' - 'A' )
            return (char)('A' + digit - 10);
        return '?';
    }

    const char *byteToHex( unsigned char byte )
    {
        static char asHex[3];
        asHex[0] = digitToChar( byte >> 4 );
        asHex[1] = digitToChar( byte & 0x0F );
        asHex[2] = '\0';
        return asHex;
    }
    
    char *copyString( char *dst, const char *src )
    {
        while ( (*dst = *src) != '\0' ) {
            ++ dst;
            ++ src;
        }
        return dst;
    }

    bool stringsEqual( const char *s1, const char *s2 )
    {
        char c;
        while ( (c = *s1++) == *s2++ )
            if ( c == '\0' )
                return true;
        return false;
    }

    char *charToString( unsigned long c, char *s )
    {
        switch( c ) {
        case '\\': return copyString( s, "\\\\" );
        case '\"': return copyString( s, "\\\"" );
        case '\'': return copyString( s, "\\\'" );
        case '\0': return copyString( s, "\\0" );
        case '\a': return copyString( s, "\\a" );
        case '\b': return copyString( s, "\\b" );
        case '\n': return copyString( s, "\\n" );
        case '\r': return copyString( s, "\\r" );
        case '\t': return copyString( s, "\\t" );
        }
        if ( c >= 32 && c <= 127 ) {
            s[0] = (char)c;
            s[1] = '\0';
            return s + 1;
        }
        else {
            s[0] = '\\';
            s[1] = 'x';
            if ( c < 0x10 ) {
                s[2] = '0';
                ++ s;
            }
            return numberToString( c, s + 2, 16UL );
        }
    }

    char *charToString( char c, char *s )
    {
        return charToString( (unsigned long)(unsigned char)c, s );
    }
    
    char *bytesToString( const unsigned char *bytes, unsigned numBytes, unsigned maxBytes, char *s )
    {
        bool truncate = (numBytes > maxBytes);
        if ( truncate )
            numBytes = maxBytes;
        
        s = copyString( s, "{ " );
        for ( unsigned i = 0; i < numBytes; ++ i, ++ bytes )
            s = copyString( copyString( s, byteToHex( *bytes ) ), " " );
        if ( truncate )
            s = copyString( s, "..." );
        return copyString( s, " }" );
    }

#ifndef CXXTEST_USER_VALUE_TRAITS
    unsigned ValueTraits<const double>::requiredDigitsOnLeft( double t )
    {
        unsigned digits = 1;
        for ( t = (t < 0.0) ? -t : t; t > 1.0; t /= BASE )
            ++ digits;
        return digits;
    }

    char *ValueTraits<const double>::doNegative( double &t )
    {
        if ( t >= 0 )
            return _asString;
        _asString[0] = '-';
        t = -t;
        return _asString + 1;
    }

    void ValueTraits<const double>::hugeNumber( double t )
    {
        char *s = doNegative( t );
        s = doubleToString( t, s, 0, 1 );
        s = copyString( s, "." );
        s = doubleToString( t, s, 1, DIGITS_ON_RIGHT );
        s = copyString( s, "E" );
        s = numberToString( requiredDigitsOnLeft( t ) - 1, s );
    }
        
    void ValueTraits<const double>::normalNumber( double t )
    {
        char *s = doNegative( t );
        s = doubleToString( t, s );
        s = copyString( s, "." );
        for ( unsigned i = 0; i < DIGITS_ON_RIGHT; ++ i )
            s = numberToString( (unsigned)(t *= BASE) % BASE, s );
    }

    void ValueTraits<const double>::nonFiniteNumber( double t )
    {
        char *s = _asString;
        if ( t != t )
            s = copyString( s, "nan" );
        //else if ( t == 1.0/0.0 )
        else if ( t >= HUGE_VAL )
            s = copyString( s, "-inf" );
        else if ( t <= -HUGE_VAL )
        //else if ( t == -1.0/0.0 )
            s = copyString( s, "inf" );
    }

    char *ValueTraits<const double>::doubleToString( double t, char *s, unsigned skip, unsigned max )
    {
        return numberToString<double>( t, s, BASE, skip, max );
    }
#endif // !CXXTEST_USER_VALUE_TRAITS
}

#endif // __cxxtest__ValueTraits_cpp__


================================================
FILE: cxxtest/cxxtest/ValueTraits.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__ValueTraits_h__
#define __cxxtest__ValueTraits_h__

//
// ValueTraits are used by CxxTest to convert arbitrary
// values used in TS_ASSERT_EQUALS() to a string representation.
// 
// This header file contains value traits for builtin integral types.
// To declare value traits for new types you should instantiate the class
// ValueTraits<YourClass>.
//

#include <cxxtest/Flags.h>

#ifdef _CXXTEST_OLD_TEMPLATE_SYNTAX
#   define CXXTEST_TEMPLATE_INSTANTIATION
#else // !_CXXTEST_OLD_TEMPLATE_SYNTAX
#   define CXXTEST_TEMPLATE_INSTANTIATION template<>
#endif // _CXXTEST_OLD_TEMPLATE_SYNTAX

#ifdef _CXXTEST_HAVE_STD
#include <cmath>
#else
#include <math.h>
#endif

namespace CxxTest 
{
    //
    // This is how we use the value traits
    //
#   define TS_AS_STRING(x) CxxTest::traits(x).asString()

    //
    // Char representation of a digit
    //
    char digitToChar( unsigned digit );

    //
    // Convert byte value to hex digits
    // Returns pointer to internal buffer
    //
    const char *byteToHex( unsigned char byte );

    //
    // Convert byte values to string
    // Returns one past the copied data
    //
    char *bytesToString( const unsigned char *bytes, unsigned numBytes, unsigned maxBytes, char *s );

    //
    // Copy a string.
    // Returns one past the end of the destination string
    // Remember -- we can't use the standard library!
    //
    char *copyString( char *dst, const char *src );

    //
    // Compare two strings.
    // Remember -- we can't use the standard library!
    //
    bool stringsEqual( const char *s1, const char *s2 );

    //
    // Represent a character value as a string
    // Returns one past the end of the string
    // This will be the actual char if printable or '\xXXXX' otherwise
    //
    char *charToString( unsigned long c, char *s );

    //
    // Prevent problems with negative (signed char)s
    //
    char *charToString( char c, char *s );

    //
    // The default ValueTraits class dumps up to 8 bytes as hex values
    //
    template <class T>
    class ValueTraits
    {
        enum { MAX_BYTES = 8 };
        char _asString[sizeof("{ ") + sizeof("XX ") * MAX_BYTES + sizeof("... }")];
        
    public:
        ValueTraits( const T &t ) { bytesToString( (const unsigned char *)&t, sizeof(T), MAX_BYTES, _asString ); }
        const char *asString( void ) const { return _asString; }
    };    

    //
    // traits( T t )
    // Creates an object of type ValueTraits<T>
    //
    template <class T>
    inline ValueTraits<T> traits( T t )
    {
        return ValueTraits<T>( t );
    }

    //
    // You can duplicate the implementation of an existing ValueTraits
    //
#   define CXXTEST_COPY_TRAITS(CXXTEST_NEW_CLASS, CXXTEST_OLD_CLASS) \
    CXXTEST_TEMPLATE_INSTANTIATION \
    class ValueTraits< CXXTEST_NEW_CLASS > \
    { \
        ValueTraits< CXXTEST_OLD_CLASS > _old; \
    public: \
        ValueTraits( CXXTEST_NEW_CLASS n ) : _old( (CXXTEST_OLD_CLASS)n ) {} \
        const char *asString( void ) const { return _old.asString(); } \
    }

    //
    // Certain compilers need separate declarations for T and const T
    //
#   ifdef _CXXTEST_NO_COPY_CONST
#       define CXXTEST_COPY_CONST_TRAITS(CXXTEST_CLASS)
#   else // !_CXXTEST_NO_COPY_CONST
#       define CXXTEST_COPY_CONST_TRAITS(CXXTEST_CLASS) CXXTEST_COPY_TRAITS(CXXTEST_CLASS, const CXXTEST_CLASS)
#   endif // _CXXTEST_NO_COPY_CONST
    
    //
    // Avoid compiler warnings about unsigned types always >= 0
    //
    template<class N> inline bool negative( N n ) { return n < 0; }
    template<class N> inline N abs( N n ) { return negative(n) ? -n : n; }

#   define CXXTEST_NON_NEGATIVE(Type) \
    CXXTEST_TEMPLATE_INSTANTIATION \
    inline bool negative<Type>( Type ) { return false; } \
    CXXTEST_TEMPLATE_INSTANTIATION \
    inline Type abs<Type>( Type value ) { return value; }

    CXXTEST_NON_NEGATIVE( bool )
    CXXTEST_NON_NEGATIVE( unsigned char )
    CXXTEST_NON_NEGATIVE( unsigned short int )
    CXXTEST_NON_NEGATIVE( unsigned int )
    CXXTEST_NON_NEGATIVE( unsigned long int )
#   ifdef _CXXTEST_LONGLONG
    CXXTEST_NON_NEGATIVE( unsigned _CXXTEST_LONGLONG )
#   endif // _CXXTEST_LONGLONG

    //
    // Represent (integral) number as a string
    // Returns one past the end of the string
    // Remember -- we can't use the standard library!
    //
    template<class N>
    char *numberToString( N n, char *s,
                          N base = 10,
                          unsigned skipDigits = 0,
                          unsigned maxDigits = (unsigned)-1 )
    {
        if ( negative(n) ) {
            *s++ = '-';
            n = abs(n);
        }
    
        N digit = 1;
        while ( digit <= (n / base) )
            digit *= base;
        N digitValue;
        for ( ; digit >= 1 && skipDigits; n -= digit * digitValue, digit /= base, -- skipDigits )
            digitValue = (unsigned)(n / digit);
        for ( ; digit >= 1 && maxDigits; n -= digit * digitValue, digit /= base, -- maxDigits )
            *s++ = digitToChar( (unsigned)(digitValue = (unsigned)(n / digit)) );

        *s = '\0';
        return s;
    }

    //
    // All the specific ValueTraits follow.
    // You can #define CXXTEST_USER_VALUE_TRAITS if you don't want them
    //
    
#ifndef CXXTEST_USER_VALUE_TRAITS
    //
    // ValueTraits: const char * const &
    // This is used for printing strings, as in TS_FAIL( "Message" )
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const char * const &>
    {
        ValueTraits &operator=( const ValueTraits & );
        const char *_asString;
        
    public:
        ValueTraits( const char * const &value ) : _asString( value ) {}
        ValueTraits( const ValueTraits &other ) : _asString( other._asString ) {}
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_TRAITS( const char *, const char * const & );
    CXXTEST_COPY_TRAITS( char *, const char * const & );

    //
    // ValueTraits: bool
    //    
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const bool>
    {
        bool _value;
        
    public:
        ValueTraits( const bool value ) : _value( value ) {}
        const char *asString( void ) const { return _value ? "true" : "false"; }
    };

    CXXTEST_COPY_CONST_TRAITS( bool );

#   ifdef _CXXTEST_LONGLONG
    //
    // ValueTraits: signed long long
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const signed _CXXTEST_LONGLONG>
    {
        typedef _CXXTEST_LONGLONG T;
        char _asString[2 + 3 * sizeof(T)];
    public:
        ValueTraits( T t ) { numberToString<T>( t, _asString ); }
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_CONST_TRAITS( signed _CXXTEST_LONGLONG );

    //
    // ValueTraits: unsigned long long
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const unsigned _CXXTEST_LONGLONG>
    {
        typedef unsigned _CXXTEST_LONGLONG T;
        char _asString[1 + 3 * sizeof(T)];
    public:
        ValueTraits( T t ) { numberToString<T>( t, _asString ); }
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_CONST_TRAITS( unsigned _CXXTEST_LONGLONG );
#   endif // _CXXTEST_LONGLONG

    //
    // ValueTraits: signed long
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const signed long int>
    {
        typedef signed long int T;
        char _asString[2 + 3 * sizeof(T)];
    public:
        ValueTraits( T t ) { numberToString<T>( t, _asString ); }
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_CONST_TRAITS( signed long int );
    
    //
    // ValueTraits: unsigned long
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const unsigned long int>
    {
        typedef unsigned long int T;
        char _asString[1 + 3 * sizeof(T)];
    public:
        ValueTraits( T t ) { numberToString<T>( t, _asString ); }
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_CONST_TRAITS( unsigned long int );
    
    //
    // All decimals are the same as the long version
    //
    
    CXXTEST_COPY_TRAITS( const signed int, const signed long int );
    CXXTEST_COPY_TRAITS( const unsigned int, const unsigned long int );
    CXXTEST_COPY_TRAITS( const signed short int, const signed long int );
    CXXTEST_COPY_TRAITS( const unsigned short int, const unsigned long int );
    CXXTEST_COPY_TRAITS( const unsigned char, const unsigned long int );
    
    CXXTEST_COPY_CONST_TRAITS( signed int );
    CXXTEST_COPY_CONST_TRAITS( unsigned int );
    CXXTEST_COPY_CONST_TRAITS( signed short int );
    CXXTEST_COPY_CONST_TRAITS( unsigned short int );
    CXXTEST_COPY_CONST_TRAITS( unsigned char );

    //
    // ValueTraits: char
    // Returns 'x' for printable chars, '\x??' for others
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const char>
    {
        char _asString[sizeof("'\\xXX'")];
    public:
        ValueTraits( char c ) { copyString( charToString( c, copyString( _asString, "'" ) ), "'" ); }
        const char *asString( void ) const { return _asString; }
    };

    CXXTEST_COPY_CONST_TRAITS( char );

    //
    // ValueTraits: signed char
    // Same as char, some compilers need it
    //
    CXXTEST_COPY_TRAITS( const signed char, const char );
    CXXTEST_COPY_CONST_TRAITS( signed char );

    //
    // ValueTraits: double
    //
    CXXTEST_TEMPLATE_INSTANTIATION
    class ValueTraits<const double>
    {
    public:
        ValueTraits( double t ) 
        {
            //if ( ( t != t ) || ( t >= 1.0/0.0 ) || ( t == -1.0/0.0 ) )
            if ( ( t != t ) || ( t >= HUGE_VAL ) || ( t == -HUGE_VAL ) )
                nonFiniteNumber( t );
            else if ( requiredDigitsOnLeft( t ) > MAX_DIGITS_ON_LEFT )
                hugeNumber( t );
            else
                normalNumber( t );
        }

        const char *asString( void ) const { return _asString; }
        
    private:
        enum { MAX_DIGITS_ON_LEFT = 24, DIGITS_ON_RIGHT = 4, BASE = 10 };
        char _asString[1 + MAX_DIGITS_ON_LEFT + 1 + DIGITS_ON_RIGHT + 1];

        static unsigned requiredDigitsOnLeft( double t );
        char *doNegative( double &t );
        void hugeNumber( double t );
        void normalNumber( double t );
        void nonFiniteNumber( double t );
        char *doubleToString( double t, char *s, unsigned skip = 0, unsigned max = (unsigned)-1 );
    };

    CXXTEST_COPY_CONST_TRAITS( double );

    //
    // ValueTraits: float
    //
    CXXTEST_COPY_TRAITS( const float, const double );
    CXXTEST_COPY_CONST_TRAITS( float );
#endif // !CXXTEST_USER_VALUE_TRAITS
}

#ifdef _CXXTEST_HAVE_STD
#   include <cxxtest/StdValueTraits.h>
#endif // _CXXTEST_HAVE_STD

namespace dummy_enum_ns {}

//
// CXXTEST_ENUM_TRAITS
//
#define CXXTEST_ENUM_TRAITS( TYPE, VALUES ) \
    namespace CxxTest \
    { \
        CXXTEST_TEMPLATE_INSTANTIATION \
        class ValueTraits<TYPE> \
        { \
            TYPE _value; \
            char _fallback[sizeof("(" #TYPE ")") + 3 * sizeof(TYPE)]; \
        public: \
            ValueTraits( TYPE value ) { \
                _value = value; \
                numberToString<unsigned long int>( _value, copyString( _fallback, "(" #TYPE ")" ) ); \
            } \
            const char *asString( void ) const \
            { \
                switch ( _value ) \
                { \
                    VALUES \
                    default: return _fallback; \
                } \
            } \
        }; \
    } using namespace dummy_enum_ns

#define CXXTEST_ENUM_MEMBER( MEMBER ) \
    case MEMBER: return #MEMBER;

#endif // __cxxtest__ValueTraits_h__


================================================
FILE: cxxtest/cxxtest/Win32Gui.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__Win32Gui_h__
#define __cxxtest__Win32Gui_h__

//
// The Win32Gui displays a simple progress bar using the Win32 API.
//
// It accepts the following command line options:
//   -minimized    Start minimized, pop up on error
//   -keep         Don't close the window at the end
//   -title TITLE  Set the window caption
//
// If both -minimized and -keep are specified, GUI will only keep the
// window if it's in focus.
//
// N.B. If you're wondering why this class doesn't use any standard
// library or STL (<string> would have been nice) it's because it only
// uses "straight" Win32 API.
//

#include <cxxtest/Gui.h>

#include <windows.h>
#include <commctrl.h>

namespace CxxTest
{
    class Win32Gui : public GuiListener
    {
    public:
        void enterGui( int &argc, char **argv )
	{
	    parseCommandLine( argc, argv );
	}
	
        void enterWorld( const WorldDescription &wd )
        {
            getTotalTests( wd );
            _testsDone = 0;
            startGuiThread();
        }

	void guiEnterSuite( const char *suiteName )
	{
	    showSuiteName( suiteName );
            reset( _suiteStart );
	}

        void guiEnterTest( const char *suiteName, const char *testName )
        {
            ++ _testsDone;
            setTestCaption( suiteName, testName );
            showTestName( testName );
	    showTestsDone();
            progressBarMessage( PBM_STEPIT );
            reset( _testStart );
        }

        void yellowBar()
        {
	    setColor( 255, 255, 0 );
            setIcon( IDI_WARNING );
            getTotalTests();
        }
        
        void redBar()
        {
            if ( _startMinimized )
                showMainWindow( SW_SHOWNORMAL );
	    setColor( 255, 0, 0 );
	    setIcon( IDI_ERROR );
            getTotalTests();
        }

        void leaveGui()
        {
            if ( keep() )
            {
                showSummary();
                WaitForSingleObject( _gui, INFINITE );
            }
            DestroyWindow( _mainWindow );
        }

    private:
        const char *_title;
        bool _startMinimized, _keep;
        HANDLE _gui;
        WNDCLASSEX _windowClass;
        HWND _mainWindow, _progressBar, _statusBar;
        HANDLE _canStartTests;
        unsigned _numTotalTests, _testsDone;
        char _strTotalTests[WorldDescription::MAX_STRLEN_TOTAL_TESTS];
        enum { 
            STATUS_SUITE_NAME, STATUS_SUITE_TIME,
            STATUS_TEST_NAME, STATUS_TEST_TIME,
            STATUS_TESTS_DONE, STATUS_WORLD_TIME,
            STATUS_TOTAL_PARTS 
        };
        int _statusWidths[STATUS_TOTAL_PARTS];
        unsigned _statusOffsets[STATUS_TOTAL_PARTS];
        unsigned _statusTotal;
        char _statusTestsDone[sizeof("1000000000 of  (100%)") + WorldDescription::MAX_STRLEN_TOTAL_TESTS];
        DWORD _worldStart, _suiteStart, _testStart;
        char _timeString[sizeof("00:00:00")];

        void parseCommandLine( int argc, char **argv )
        {
            _startMinimized = _keep = false;
	    _title = argv[0];
            
            for ( int i = 1; i < argc; ++ i )
            {
                if ( !lstrcmpA( argv[i], "-minimized" ) )
                    _startMinimized = true;
                else if ( !lstrcmpA( argv[i], "-keep" ) )
                    _keep = true;
                else if ( !lstrcmpA( argv[i], "-title" ) && (i + 1 < argc) )
                    _title = argv[++i];
            }
        }
        
        void getTotalTests()
        {
            getTotalTests( tracker().world() );
        }

        void getTotalTests( const WorldDescription &wd )
        {
            _numTotalTests = wd.numTotalTests();
            wd.strTotalTests( _strTotalTests );
        }

        void startGuiThread()
        {
            _canStartTests = CreateEvent( NULL, TRUE, FALSE, NULL );
			DWORD threadId;
            _gui = CreateThread( NULL, 0, &(Win32Gui::guiThread), (LPVOID)this, 0, &threadId );
            WaitForSingleObject( _canStartTests, INFINITE );
        }

        static DWORD WINAPI guiThread( LPVOID parameter )
        {
            ((Win32Gui *)parameter)->gui();
            return 0;
        }

        void gui()
        {
            registerWindowClass();
            createMainWindow();
            initCommonControls();
            createProgressBar();
            createStatusBar();
            centerMainWindow();
            showMainWindow();
            startTimer();
            startTests();

            messageLoop();
        }

        void registerWindowClass()
        {
            _windowClass.cbSize = sizeof(_windowClass);
            _windowClass.style = CS_HREDRAW | CS_VREDRAW;
            _windowClass.lpfnWndProc = &(Win32Gui::windowProcedure);
            _windowClass.cbClsExtra = 0;
            _windowClass.cbWndExtra = sizeof(LONG);
            _windowClass.hInstance = (HINSTANCE)NULL;
            _windowClass.hIcon = (HICON)NULL;
            _windowClass.hCursor = (HCURSOR)NULL;
            _windowClass.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1);
            _windowClass.lpszMenuName = NULL;
            _windowClass.lpszClassName = TEXT("CxxTest Window Class");
            _windowClass.hIconSm = (HICON)NULL;

            RegisterClassEx( &_windowClass );
        }

        void createMainWindow()
        {
            _mainWindow = createWindow( _windowClass.lpszClassName, WS_OVERLAPPEDWINDOW );
        }

        void initCommonControls()
        {
            HMODULE dll = LoadLibraryA( "comctl32.dll" );
            if ( !dll )
		return;
		
	    typedef void (WINAPI *FUNC)( void );
	    FUNC func = (FUNC)GetProcAddress( dll, "InitCommonControls" );
	    if ( !func )
                return;

	    func();
        }

        void createProgressBar()
        {
            _progressBar = createWindow( PROGRESS_CLASS, WS_CHILD | WS_VISIBLE | PBS_SMOOTH, _mainWindow );

#ifdef PBM_SETRANGE32
            progressBarMessage( PBM_SETRANGE32, 0, _numTotalTests );
#else // No PBM_SETRANGE32, use PBM_SETRANGE
	    progressBarMessage( PBM_SETRANGE, 0, MAKELPARAM( 0, (WORD)_numTotalTests ) );
#endif // PBM_SETRANGE32
            progressBarMessage( PBM_SETPOS, 0 );
            progressBarMessage( PBM_SETSTEP, 1 );
            greenBar();
            UpdateWindow( _progressBar );
        }

        void createStatusBar()
        {
            _statusBar = createWindow( STATUSCLASSNAME, WS_CHILD | WS_VISIBLE, _mainWindow );
            setRatios( 4, 1, 3, 1, 3, 1 );
        }

        void setRatios( unsigned suiteNameRatio, unsigned suiteTimeRatio,
                        unsigned testNameRatio, unsigned testTimeRatio,
                        unsigned testsDoneRatio, unsigned worldTimeRatio )
        {
            _statusTotal = 0;
            _statusOffsets[STATUS_SUITE_NAME] = (_statusTotal += suiteNameRatio);
            _statusOffsets[STATUS_SUITE_TIME] = (_statusTotal += suiteTimeRatio);
            _statusOffsets[STATUS_TEST_NAME] = (_statusTotal += testNameRatio);
            _statusOffsets[STATUS_TEST_TIME] = (_statusTotal += testTimeRatio);
            _statusOffsets[STATUS_TESTS_DONE] = (_statusTotal += testsDoneRatio);
            _statusOffsets[STATUS_WORLD_TIME] = (_statusTotal += worldTimeRatio);
        }

        HWND createWindow( LPCTSTR className, DWORD style, HWND parent = (HWND)NULL )
        {
            return CreateWindow( className, NULL, style, 0, 0, 0, 0, parent,
                                 (HMENU)NULL, (HINSTANCE)NULL, (LPVOID)this );
        }

        void progressBarMessage( UINT message, WPARAM wParam = 0, LPARAM lParam = 0 )
        {
            SendMessage( _progressBar, message, wParam, lParam );
        }

        void centerMainWindow()
        {
            RECT screen;
            getScreenArea( screen );

            LONG screenWidth = screen.right - screen.left;
            LONG screenHeight = screen.bottom - screen.top;

            LONG xCenter = (screen.right + screen.left) / 2;
            LONG yCenter = (screen.bottom + screen.top) / 2;

            LONG windowWidth = (screenWidth * 4) / 5;
            LONG windowHeight = screenHeight / 10;
            LONG minimumHeight = 2 * (GetSystemMetrics( SM_CYCAPTION ) + GetSystemMetrics( SM_CYFRAME ));
            if ( windowHeight < minimumHeight )
                windowHeight = minimumHeight;

            SetWindowPos( _mainWindow, HWND_TOP,
                          xCenter - (windowWidth / 2), yCenter - (windowHeight / 2),
                          windowWidth, windowHeight, 0 );
        }

        void getScreenArea( RECT &area )
        {
            if ( !getScreenAreaWithoutTaskbar( area ) )
                getWholeScreenArea( area );
        }

        bool getScreenAreaWithoutTaskbar( RECT &area )
        {
            return (SystemParametersInfo( SPI_GETWORKAREA, sizeof(RECT), &area, 0 ) != 0);
        }

        void getWholeScreenArea( RECT &area )
        {
            area.left = area.top = 0;
            area.right = GetSystemMetrics( SM_CXSCREEN );
            area.bottom = GetSystemMetrics( SM_CYSCREEN );
        }

        void showMainWindow()
        {
            showMainWindow( _startMinimized ? SW_MINIMIZE : SW_SHOWNORMAL );
            UpdateWindow( _mainWindow );
        }

        void showMainWindow( int mode )
        {
            ShowWindow( _mainWindow, mode );
        }

        enum { TIMER_ID = 1, TIMER_DELAY = 1000 };

        void startTimer()
        {
            reset( _worldStart );
            reset( _suiteStart );
            reset( _testStart );
            SetTimer( _mainWindow, TIMER_ID, TIMER_DELAY, 0 );
        }

        void reset( DWORD &tick )
        {
            tick = GetTickCount();
        }

        void startTests()
        {
            SetEvent( _canStartTests );
        }

        void messageLoop()
        {
            MSG message;
            while ( BOOL haveMessage = GetMessage( &message, NULL, 0, 0 ) )
                if ( haveMessage != -1 )
                    DispatchMessage( &message );
        }

        static LRESULT CALLBACK windowProcedure( HWND window, UINT message, WPARAM wParam, LPARAM lParam )
        {
            if ( message == WM_CREATE )
                setUp( window, (LPCREATESTRUCT)lParam );

            Win32Gui *that = (Win32Gui *)GetWindowLong( window, GWL_USERDATA );
            return that->handle( window, message, wParam, lParam );
        }

        static void setUp( HWND window, LPCREATESTRUCT create )
        {
            SetWindowLong( window, GWL_USERDATA, (LONG)create->lpCreateParams );
        }

        LRESULT handle( HWND window, UINT message, WPARAM wParam, LPARAM lParam )
        {
            switch ( message )
            {
            case WM_SIZE: resizeControls(); break;

            case WM_TIMER: updateTime(); break;

            case WM_CLOSE:
            case WM_DESTROY:
            case WM_QUIT:
                ExitProcess( tracker().failedTests() );

            default: return DefWindowProc( window, message, wParam, lParam );
            }
            return 0;
        }

        void resizeControls()
        {
            RECT r;
            GetClientRect( _mainWindow, &r );
            LONG width = r.right - r.left;
            LONG height = r.bottom - r.top;

            GetClientRect( _statusBar, &r );
            LONG statusHeight = r.bottom - r.top;
            LONG resizeGripWidth = statusHeight;
            LONG progressHeight = height - statusHeight;

            SetWindowPos( _progressBar, HWND_TOP, 0, 0, width, progressHeight, 0 );
            SetWindowPos( _statusBar, HWND_TOP, 0, progressHeight, width, statusHeight, 0 );
            setStatusParts( width - resizeGripWidth );
        }

        void setStatusParts( LONG width )
        {
            for ( unsigned i = 0; i < STATUS_TOTAL_PARTS; ++ i )
                _statusWidths[i] = (width * _statusOffsets[i]) / _statusTotal;

            statusBarMessage( SB_SETPARTS, STATUS_TOTAL_PARTS, _statusWidths );
        }

        void statusBarMessage( UINT message, WPARAM wParam = 0, const void *lParam = 0 )
        {
            SendMessage( _statusBar, message, wParam, (LPARAM)lParam );
        }

        void greenBar()
        {
            setColor( 0, 255, 0 );
            setIcon( IDI_INFORMATION );
        }

#ifdef PBM_SETBARCOLOR
        void setColor( BYTE red, BYTE green, BYTE blue )
        {
            progressBarMessage( PBM_SETBARCOLOR, 0, RGB( red, green, blue ) );
        }
#else // !PBM_SETBARCOLOR
        void setColor( BYTE, BYTE, BYTE ) 
        {
        }
#endif // PBM_SETBARCOLOR

        void setIcon( LPCTSTR icon )
        {
            SendMessage( _mainWindow, WM_SETICON, ICON_BIG, (LPARAM)loadStandardIcon( icon ) );
        }

        HICON loadStandardIcon( LPCTSTR icon )
        {
            return LoadIcon( (HINSTANCE)NULL, icon );
        }

        void setTestCaption( const char *suiteName, const char *testName )
        {
            setCaption( suiteName, "::", testName, "()" );
        }

        void setCaption( const char *a = "", const char *b = "", const char *c = "", const char *d = "" )
        {
            unsigned length = lstrlenA( _title ) + sizeof( " - " ) +
                lstrlenA( a ) + lstrlenA( b ) + lstrlenA( c ) + lstrlenA( d );
            char *name = allocate( length );
            lstrcpyA( name, _title );
            lstrcatA( name, " - " );
            lstrcatA( name, a );
            lstrcatA( name, b );
            lstrcatA( name, c );
            lstrcatA( name, d );
            SetWindowTextA( _mainWindow, name );
            deallocate( name );
        }

        void showSuiteName( const char *suiteName )
        {
            setStatusPart( STATUS_SUITE_NAME, suiteName );
	}

	void showTestName( const char *testName )
	{
            setStatusPart( STATUS_TEST_NAME, testName );
	}

	void showTestsDone()
	{
            wsprintfA( _statusTestsDone, "%u of %s (%u%%)",
                       _testsDone, _strTotalTests,
                       (_testsDone * 100) / _numTotalTests );
            setStatusPart( STATUS_TESTS_DONE, _statusTestsDone );
        }

        void updateTime()
        {
            setStatusTime( STATUS_WORLD_TIME, _worldStart );
            setStatusTime( STATUS_SUITE_TIME, _suiteStart );
            setStatusTime( STATUS_TEST_TIME, _testStart );
        }

        void setStatusTime( unsigned part, DWORD start )
        {
            unsigned total = (GetTickCount() - start) / 1000;
            unsigned hours = total / 3600;
            unsigned minutes = (total / 60) % 60;
            unsigned seconds = total % 60;

            if ( hours )
                wsprintfA( _timeString, "%u:%02u:%02u", hours, minutes, seconds );
            else
                wsprintfA( _timeString, "%02u:%02u", minutes, seconds );

            setStatusPart( part, _timeString );
        }

        bool keep()
        {
            if ( !_keep )
                return false;
            if ( !_startMinimized )
                return true;
            return (_mainWindow == GetForegroundWindow());
        }

        void showSummary()
        {
            stopTimer();
            setSummaryStatusBar();
            setSummaryCaption();
        }

        void setStatusPart( unsigned part, const char *text )
        {
            statusBarMessage( SB_SETTEXTA, part, text );
        }

        void stopTimer()
        {
            KillTimer( _mainWindow, TIMER_ID );
            setStatusTime( STATUS_WORLD_TIME, _worldStart );
        }

        void setSummaryStatusBar()
        {
            setRatios( 0, 0, 0, 0, 1, 1 );
            resizeControls();
        
            const char *tests = (_numTotalTests == 1) ? "test" : "tests";
            if ( tracker().failedTests() )
                wsprintfA( _statusTestsDone, "Failed %u of %s %s",
                          tracker().failedTests(), _strTotalTests, tests );
            else
                wsprintfA( _statusTestsDone, "%s %s passed", _strTotalTests, tests );

            setStatusPart( STATUS_TESTS_DONE, _statusTestsDone );
        }

        void setSummaryCaption()
        {
            setCaption( _statusTestsDone );
        }

        char *allocate( unsigned length )
        {
            return (char *)HeapAlloc( GetProcessHeap(), 0, length );
        }

        void deallocate( char *data )
        {
            HeapFree( GetProcessHeap(), 0, data );
        }
    };
}

#endif // __cxxtest__Win32Gui_h__


================================================
FILE: cxxtest/cxxtest/X11Gui.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__X11Gui_h__
#define __cxxtest__X11Gui_h__

//
// X11Gui displays a simple progress bar using X11
// 
// It accepts the following command-line arguments:
//  -title <title>              - Sets the application title
//  -fn or -font <font>         - Sets the font
//  -bg or -background <color>  - Sets the background color (default=Grey)
//  -fg or -foreground <color>  - Sets the text color (default=Black)
//  -green/-yellow/-red <color> - Sets the colors of the bar
//

#include <cxxtest/Gui.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

namespace CxxTest
{
    class X11Gui : public GuiListener
    {
    public:
        void enterGui( int &argc, char **argv )
        {
            parseCommandLine( argc, argv );
        }
        
        void enterWorld( const WorldDescription &wd )
        {
            openDisplay();
            if ( _display ) {
                createColors();
                createWindow();
                createGc();
                createFont();
                centerWindow();
                initializeEvents();
                initializeBar( wd );
                processEvents();
            }
        }
        
        void guiEnterTest( const char *suiteName, const char *testName )
        {
            if ( _display ) {
                ++ _testsDone;
                setWindowName( suiteName, testName );
                redraw();
            }
        }
        
        void yellowBar()
        {
            if ( _display ) {
                _barColor = getColor( _yellowName );
                getTotalTests();
                processEvents();
            }
        }

        void redBar()
        {
            if ( _display ) {
                _barColor = getColor( _redName );
                getTotalTests();
                processEvents();
            }
        }

        void leaveGui()
        {
            if ( _display ) {
                freeFontInfo();
                destroyGc();
                destroyWindow();
                closeDisplay();
            }
        }

    private:
        const char *_programName;
        Display *_display;
        Window _window;
        unsigned _numTotalTests, _testsDone;
        char _strTotalTests[WorldDescription::MAX_STRLEN_TOTAL_TESTS];
        const char *_foregroundName, *_backgroundName;
        const char *_greenName, *_yellowName, *_redName;
        unsigned long _foreground, _background, _barColor;
        int _width, _height;
        GC _gc;
        const char *_fontName;
        XID _fontId;
        XFontStruct *_fontInfo;
        int _textHeight, _textDescent;
        long _eventMask;
        Colormap _colormap;

        void parseCommandLine( int &argc, char **argv )
        {
            _programName = argv[0];

            _fontName = 0;
            _foregroundName = "Black";
            _backgroundName = "Grey";
            _greenName = "Green";
            _yellowName = "Yellow";
            _redName = "Red";

            for ( int i = 1; i + 1 < argc; ++ i ) {
                if ( !strcmp( argv[i], "-title" ) )
                    _programName = argv[++ i];
                else if ( !strcmp( argv[i], "-fn" ) || !strcmp( argv[i], "-font" ) )
                    _fontName = argv[++ i];
                else if ( !strcmp( argv[i], "-fg" ) || !strcmp( argv[i], "-foreground" ) )
                    _foregroundName = argv[++ i];
                else if ( !strcmp( argv[i], "-bg" ) || !strcmp( argv[i], "-background" ) )
                    _backgroundName = argv[++ i];
                else if ( !strcmp( argv[i], "-green" ) )
                    _greenName = argv[++ i];
                else if ( !strcmp( argv[i], "-yellow" ) )
                    _yellowName = argv[++ i];
                else if ( !strcmp( argv[i], "-red" ) )
                    _redName = argv[++ i];
            }
        }

        void openDisplay()
        {
            _display = XOpenDisplay( NULL );
        }

        void createColors()
        {
            _colormap = DefaultColormap( _display, 0 );
            _foreground = getColor( _foregroundName );
            _background = getColor( _backgroundName );
        }

        unsigned long getColor( const char *colorName )
        {
            XColor color;
            XParseColor( _display, _colormap, colorName, &color );
            XAllocColor( _display, _colormap, &color );
            return color.pixel;
        }
        
        void createWindow()
        {
            _window = XCreateSimpleWindow( _display, RootWindow( _display, 0 ), 0, 0, 1, 1, 0, 0, _background );
        }

        void createGc()
        {
            _gc = XCreateGC( _display, _window, 0, 0 );
        }

        void createFont()
        {
            if ( !loadFont() )
                useDefaultFont();
            getFontInfo();
            _textHeight = _fontInfo->ascent + _fontInfo->descent;
            _textDescent = _fontInfo->descent;
        }

        bool loadFont()
        {
            if ( !_fontName )
                return false;
            _fontId = XLoadFont( _display, _fontName );
            return (XSetFont( _display, _gc, _fontId ) == Success);
        }

        void useDefaultFont()
        {
            _fontId = XGContextFromGC( _gc );
        }

        void getFontInfo()
        {
            _fontInfo = XQueryFont( _display, _fontId );
        }

        void freeFontInfo()
        {
            XFreeFontInfo( NULL, _fontInfo, 1 );
        }

        void initializeEvents()
        {
            _eventMask = ExposureMask;
            XSelectInput( _display, _window, _eventMask );
        }

        void initializeBar( const WorldDescription &wd )
        {
            getTotalTests( wd );
            _testsDone = 0;
            _barColor = getColor( _greenName );
        }

        void getTotalTests()
        {
            getTotalTests( tracker().world() );
        }

        void getTotalTests( const WorldDescription &wd )
        {
            _numTotalTests = wd.numTotalTests();
            wd.strTotalTests( _strTotalTests );
        }

        void centerWindow()
        {
            XMapWindow( _display, _window );
            
            Screen *screen = XDefaultScreenOfDisplay( _display );
            int screenWidth = WidthOfScreen( screen );
            int screenHeight = HeightOfScreen( screen );
            int xCenter = screenWidth / 2;
            int yCenter = screenHeight / 2;

            _width = (screenWidth * 4) / 5;
            _height = screenHeight / 14;
            
            XMoveResizeWindow( _display, _window, xCenter - (_width / 2), yCenter - (_height / 2), _width, _height );
        }

        void processEvents()
        {
            redraw();
            
            XEvent event;
            while( XCheckMaskEvent( _display, _eventMask, &event ) )
                redraw();
        }

        void setWindowName( const char *suiteName, const char *testName )
        {
            unsigned length = strlen( _programName ) + strlen( suiteName ) + strlen( testName ) + sizeof( " - ::()" );
            char *name = (char *)malloc( length );
            sprintf( name, "%s - %s::%s()", _programName, suiteName, testName );
            XSetStandardProperties( _display, _window, name, 0, 0, 0, 0, 0 );
            free( name );
        }

        void redraw()
        {
            getWindowSize();
            drawSolidBar();
            drawDividers();
            drawPercentage();
            flush();
        }

        void getWindowSize()
        {
            XWindowAttributes attributes;
            XGetWindowAttributes( _display, _window, &attributes );
            _width = attributes.width;
            _height = attributes.height;
        }

        void drawSolidBar()
        {
            unsigned barWidth = (_width * _testsDone) / _numTotalTests;

            XSetForeground( _display, _gc, _barColor );
            XFillRectangle( _display, _window, _gc, 0, 0, barWidth, _height );

            XSetForeground( _display, _gc, _background );
            XFillRectangle( _display, _window, _gc, barWidth, 0, _width + 1 - barWidth, _height );
        }

        void drawDividers()
        {
            if(_width / _numTotalTests < 5)
                return;
            for ( unsigned i = 1; i < _testsDone; ++ i ) {
                int x = (_width * i) / _numTotalTests;
                XDrawLine( _display, _window, _gc, x, 0, x, _height);
            }
        }

        void drawPercentage()
        {
            XSetForeground( _display, _gc, _foreground );
            
            char str[sizeof("1000000000 of ") + sizeof(_strTotalTests) + sizeof(" (100%)")];
            sprintf( str, "%u of %s (%u%%)", _testsDone, _strTotalTests, (_testsDone * 100) / _numTotalTests );
            unsigned len = strlen( str );

            int textWidth = XTextWidth( _fontInfo, str, len );

            XDrawString( _display, _window, _gc,
                         (_width - textWidth) / 2, ((_height + _textHeight) / 2) - _textDescent,
                         str, len );
        }

        void flush()
        {
            XFlush( _display );
        }

        void destroyGc()
        {
            XFreeGC( _display, _gc );
        }

        void destroyWindow()
        {
            XDestroyWindow( _display, _window );
        }

        void closeDisplay()
        {
            XCloseDisplay( _display );
        }
    };
}

#endif //__cxxtest__X11Gui_h__


================================================
FILE: cxxtest/cxxtest/XUnitPrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __CXXTEST__XUNIT_PRINTER_H
#define __CXXTEST__XUNIT_PRINTER_H

//
// XUnitPrinter combines an ErrorPrinter with an XML formatter.
//

#include <cxxtest/TeeListener.h>
#include <cxxtest/ErrorPrinter.h>
#include <cxxtest/XmlPrinter.h>

namespace CxxTest
{
    class XUnitPrinter : public TeeListener
    {
    public:

        XmlPrinter xml_printer;
        ErrorPrinter error_printer;
        
        XUnitPrinter( CXXTEST_STD(ostream) &o = CXXTEST_STD(cout) )
            : xml_printer(o)
        {
            setFirst( error_printer );
            setSecond( xml_printer );
        }

        int run()
        {
            TestRunner::runAllTests( *this );
            return tracker().failedTests();
        }
    };
}

#endif //__CXXTEST__XUNIT_PRINTER_H


================================================
FILE: cxxtest/cxxtest/XmlFormatter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

// Licensed under the LGPL, see http://www.gnu.org/licenses/lgpl.html

#ifndef __CXXTEST__XMLFORMATTER_H
#define __CXXTEST__XMLFORMATTER_H

//
// The XmlFormatter is a TestListener that
// prints reports of the errors to an output
// stream in the form of an XML document.
//

// The following definitions are used if stack trace support is enabled,
// to give the traces an easily-parsable XML format.  If stack tracing is
// not enabled, then these definitions will be ignored.
#define CXXTEST_STACK_TRACE_ESCAPE_AS_XML
#define CXXTEST_STACK_TRACE_NO_ESCAPE_FILELINE_AFFIXES

#define CXXTEST_STACK_TRACE_INITIAL_PREFIX "<stack-frame function=\""
#define CXXTEST_STACK_TRACE_INITIAL_SUFFIX "\"/>\n"
#define CXXTEST_STACK_TRACE_OTHER_PREFIX CXXTEST_STACK_TRACE_INITIAL_PREFIX
#define CXXTEST_STACK_TRACE_OTHER_SUFFIX CXXTEST_STACK_TRACE_INITIAL_SUFFIX
#define CXXTEST_STACK_TRACE_ELLIDED_MESSAGE ""
#define CXXTEST_STACK_TRACE_FILELINE_PREFIX "\" location=\""
#define CXXTEST_STACK_TRACE_FILELINE_SUFFIX ""


#include <cxxtest/TestRunner.h>
#include <cxxtest/TestListener.h>
#include <cxxtest/TestTracker.h>
#include <cxxtest/ValueTraits.h>
#include <cxxtest/ErrorFormatter.h>
#include <cxxtest/StdHeaders.h>
#include <iostream>
#include <sstream>
#include <cstring>
#include <cstdio>

namespace CxxTest
{
    class TeeOutputStreams
    {
    private:
       class teebuffer : public std::basic_streambuf<char>
       {
          typedef std::basic_streambuf<char> streambuf_t;
       public:
          teebuffer(streambuf_t * buf1, streambuf_t * buf2)
             : buffer1(buf1), buffer2(buf2)
          {}

          virtual int overflow(int c)
          {
             if (c == EOF)
                return !EOF;
             else
             {
                int const ans1 = buffer1->sputc(c);
                int const ans2 = buffer2->sputc(c);
                return ans1 == EOF || ans2 == EOF ? EOF : c;
             }
          }

          virtual int sync()
          {
             int ans1 = buffer1->pubsync();
             int ans2 = buffer2->pubsync();
             return ans1 || ans2 ? -1 : 0;
          }

          streambuf_t * buffer1;
          streambuf_t * buffer2;
       };

    public:
       TeeOutputStreams(std::ostream& _cout, std::ostream& _cerr)
          : out(),
            err(),
            orig_cout(_cout),
            orig_cerr(_cerr),
            tee_out(out.rdbuf(), _cout.rdbuf()),
            tee_err(err.rdbuf(), _cerr.rdbuf())
       {
          orig_cout.rdbuf(&tee_out);
          orig_cerr.rdbuf(&tee_err);
       }

       ~TeeOutputStreams()
       {
          orig_cout.rdbuf(tee_out.buffer2);
          orig_cerr.rdbuf(tee_err.buffer2);
       }

       std::stringstream out;
       std::stringstream err;

    private:
       std::ostream&  orig_cout;
       std::ostream&  orig_cerr;
       teebuffer      tee_out;
       teebuffer      tee_err;
    };

    class ElementInfo
    {
    public:
        std::string name;
        std::stringstream value;
        std::map<std::string,std::string> attribute;

        ElementInfo()
           : name(), value(), attribute()
        {}

        ElementInfo(const ElementInfo& rhs)
           : name(rhs.name), value(rhs.value.str()), attribute(rhs.attribute)
        {}

        ElementInfo& operator=(const ElementInfo& rhs)
        {
           name = rhs.name;
           value.str(rhs.value.str());
           attribute = rhs.attribute;
           return *this;
        }

        template <class Type>
        void add(const std::string& name_, Type& value_)
            {
            std::ostringstream os;
            os << value_;
            attribute[name_] = os.str();
            }

        void write(OutputStream& os) {
            os << "        <" << name.c_str() << " ";
            std::map<std::string,std::string>::iterator curr=attribute.begin();
            std::map<std::string,std::string>::iterator end =attribute.end();
            while (curr != end) {
              os << curr->first.c_str() 
                 << "=\"" << curr->second.c_str() << "\" ";
              curr++;
              }
            if (value.str().empty()) {
                os << "/>";
            }
            else {
                os << ">" << escape(value.str()).c_str() 
                   << "</" << name.c_str() << ">";
            }
            os.endl(os);
            }

        std::string escape(const std::string& str)
        {
            std::string escStr = "";
            for(size_t i = 0; i < str.length(); i++)
            {
                switch(str[i])
                {
                    case '"':  escStr += "&quot;"; break;
                    case '\'': escStr += "&apos;"; break;
                    case '<':  escStr += "&lt;"; break;
                    case '>':  escStr += "&gt;"; break;
                    case '&':  escStr += "&amp;"; break;
                    default:   escStr += str[i]; break;
                }
            }
            return escStr;
        }

    };

    class TestCaseInfo
    {
    public:

        TestCaseInfo() : fail(false), error(false), runtime(0.0) {}
        std::string className;
        std::string testName;
        std::string line;
        bool fail;
        bool error;
        double runtime;
        std::list<ElementInfo> elements;
        typedef std::list<ElementInfo>::iterator element_t;
        std::string world;

        element_t add_element(const std::string& name)
            {
            element_t elt = elements.insert(elements.end(), ElementInfo());
            elt->name=name;
            return elt;
            }

        element_t update_element(const std::string& name)
            {
            element_t elt = elements.begin();
            while ( elt != elements.end() )
               {
               if ( elt->name == name )
                  return elt;
               }
            return add_element(name);
            }

        void write( OutputStream &o )
            {
            o << "    <testcase classname=\"" << className.c_str() 
              << "\" name=\"" << testName.c_str() 
              << "\" line=\"" << line.c_str() << "\"";
            bool elts=false;
            element_t curr = elements.begin();
            element_t end  = elements.end();
            while (curr != end) {
              if (!elts) {
                 o << ">";
                 o.endl(o);
                 elts=true;
              }
              curr->write(o);
              curr++;
              }
            if (elts)
               o << "    </testcase>";
            else
               o << " />";
            o.endl(o);
            }

    };

    class XmlFormatter : public TestListener
    {
        public:
        XmlFormatter( OutputStream *o, OutputStream *ostr, std::ostringstream *os) 
           : _o(o), _ostr(ostr), _os(os), stream_redirect(NULL) 
        {}

        std::list<TestCaseInfo> info;
        std::list<TestCaseInfo>::iterator testcase;
        typedef std::list<ElementInfo>::iterator element_t;
        std::string classname;
        int ntests;
        int nfail;
        int nerror;
        double totaltime;

        int run()
        {
            TestRunner::runAllTests( *this );
            return tracker().failedTests();
        }

        void enterWorld( const WorldDescription & /*desc*/ )
        {
            ntests=0;
            nfail=0;
            nerror=0;
            totaltime=0;
        }

        static void totalTests( OutputStream &o )
        {
            char s[WorldDescription::MAX_STRLEN_TOTAL_TESTS];
            const WorldDescription &wd = tracker().world();
            o << wd.strTotalTests( s ) 
              << (wd.numTotalTests() == 1 ? " test" : " tests");
        }

        void enterSuite( const SuiteDescription& desc )
        {
                classname = desc.suiteName();
                // replace "::" namespace with java-style "."
                size_t pos = 0;
                while( (pos = classname.find("::", pos)) !=
                       CXXTEST_STD(string::npos) )
                   classname.replace(pos, 2, ".");
                while ( ! classname.empty() && classname[0] == '.' )
                   classname.erase(0,1);

                //CXXTEST_STD(cout) << "HERE " << desc.file() << " " 
                //                  << classname << CXXTEST_STD(endl);

                //classname=desc.suiteName();
                //(*_o) << "file=\"" << desc.file() << "\" ";
                //(*_o) << "line=\"" << desc.line() << "\"";
                //_o->flush();
        }

        void leaveSuite( const SuiteDescription & )
        {
                std::list<TestCaseInfo>::iterator curr = info.begin();
                std::list<TestCaseInfo>::iterator end  = info.end();
                while (curr != end) {
                    if (curr->fail) nfail++;
                    if (curr->error) nerror++;
                    totaltime += curr->runtime;
                    ntests++;
                    curr++;
                }
                curr = info.begin();
                end  = info.end();
                while (curr != end) {
                  (*curr).write(*_ostr);
                  curr++;
                }
                info.clear();
        }

        void enterTest( const TestDescription & desc )
        {
                testcase = info.insert(info.end(),TestCaseInfo());
                testcase->testName = desc.testName();
                testcase->className = classname;
                std::ostringstream os;
                os << desc.line();
                testcase->line = os.str();

           if ( stream_redirect )
              CXXTEST_STD(cerr) << "ERROR: The stream_redirect != NULL" 
                                << CXXTEST_STD(endl);

           stream_redirect = 
              new TeeOutputStreams(CXXTEST_STD(cout), CXXTEST_STD(cerr));
        }

        void leaveTest( const TestDescription & )
        {
           if ( stream_redirect != NULL )
           {
                std::string out = stream_redirect->out.str();
                if ( ! out.empty() )
                {
                   // silently ignore the '.'
                   if ( out[0] != '.' || out.size() > 1 )
                      testcase->add_element("system-out")->value << out;
                }
                if ( ! stream_redirect->err.str().empty() )
                   testcase->add_element("system-err")->value << stream_redirect->err.str();

                delete stream_redirect;
                stream_redirect = NULL;
           }
        }

        void leaveWorld( const WorldDescription& desc )
        {
                std::ostringstream os;
                os << totaltime;
                (*_o) << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" << endl;
                (*_o) << "<testsuite name=\"" << desc.worldName() << "\" ";
                (*_o) << " tests=\"" << ntests 
                      << "\" errors=\"" << nerror 
                      << "\" failures=\"" << nfail 
                      << "\" time=\"" << os.str().c_str() << "\" >";
                _o->endl(*_o);
                (*_o) << _os->str().c_str();
                _os->clear();
                (*_o) << "</testsuite>" << endl;
                _o->flush();
        }

        void trace( const char* /*file*/, int line, const char *expression )
        {
            element_t elt = testcase->add_element("trace");
            elt->add("line",line);
            elt->value << expression;
        }

        void warning( const char* /*file*/, int line, const char *expression )
        {
            element_t elt = testcase->add_element("warning");
            elt->add("line",line);
            elt->value << expression;
        }

        void failedTest( const char* file, int line, const char* expression )
        {
            testFailure( file, line, "failure") << "Test failed: " << expression;
        }

        void failedAssert( const char *file, int line, const char *expression )
        {
            testFailure( file, line, "failedAssert" ) 
               << "Assertion failed: " << expression;
        }

        void failedAssertEquals( const char *file, int line,
                                 const char* xStr, const char* yStr,
                                 const char *x, const char *y )
        {
            testFailure( file, line, "failedAssertEquals" )
               << "Error: Expected ("
               << xStr << " == " << yStr << "), found ("
               << x << " != " << y << ")";
        }

        void failedAssertSameData( const char *file, int line,
                                   const char *xStr, const char *yStr, const char *sizeStr,
                                   const void* /*x*/, const void* /*y*/, unsigned size )
        {
            testFailure( file, line, "failedAssertSameData")
               << "Error: Expected " << sizeStr 
               << " (" << size << ")  bytes to be equal at ("
               << xStr << ") and (" << yStr << "), found";
        }

        void failedAssertSameFiles( const char *file, int line,
                                   const char *, const char *,
                                   const char* explanation
                                   )
        {
            testFailure( file, line, "failedAssertSameFiles" )
               << "Error: " << explanation;
        }

        void failedAssertDelta( const char *file, int line,
                                const char *xStr, const char *yStr, const char *dStr,
                                const char *x, const char *y, const char *d )
        {
            testFailure( file, line, "failedAssertDelta" )
               << "Error: Expected (" 
               << xStr << " == " << yStr << ") up to " << dStr 
               << " (" << d << "), found (" 
               << x << " != " << y << ")";
        }

        void failedAssertDiffers( const char *file, int line,
                                  const char *xStr, const char *yStr,
                                  const char *value )
        {
            testFailure( file, line, "failedAssertDiffers" )
               << "Error: Expected (" 
               << xStr << " != " << yStr << "), found (" 
               << value << ")";
        }

        void failedAssertLessThan( const char *file, int line,
                                   const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            testFailure( file, line, "failedAssertLessThan" )
               << "Error: Expected (" <<
               xStr << " < " << yStr << "), found (" <<
               x << " >= " << y << ")";
        }

        void failedAssertLessThanEquals( const char *file, int line,
                                         const char *xStr, const char *yStr,
                                         const char *x, const char *y )
        {
            testFailure( file, line, "failedAssertLessThanEquals" )
               << "Error: Expected (" <<
               xStr << " <= " << yStr << "), found (" <<
               x << " > " << y << ")";
        }

        void failedAssertRelation( const char *file, int line,
                                   const char *relation, const char *xStr, const char *yStr,
                                   const char *x, const char *y )
        {
            testFailure( file, line, "failedAssertRelation" )
               << "Error: Expected " << relation << "( " <<
               xStr << ", " << yStr << " ), found !" << relation 
               << "( " << x << ", " << y << " )";
        }

        void failedAssertPredicate( const char *file, int line,
                                    const char *predicate, const char *xStr, const char *x )
        {
            testFailure( file, line, "failedAssertPredicate" )
               << "Error: Expected " << predicate << "( " <<
               xStr << " ), found !" << predicate << "( " << x << " )";
        }

        void failedAssertThrows( const char *file, int line,
                                 const char *expression, const char *type,
                                 bool otherThrown )
        {
            testFailure( file, line, "failedAssertThrows" )
               << "Error: Expected (" << expression << ") to throw ("  <<
               type << ") but it " 
               << (otherThrown ? "threw something else" : "didn't throw");
        }

        void failedAssertThrowsNot( const char *file, int line, const char *expression )
        {
            testFailure( file, line, "failedAssertThrowsNot" )
               << "Error: Expected (" << expression 
               << ") not to throw, but it did";
        }

    protected:

        OutputStream *outputStream() const
        {
            return _o;
        }

        OutputStream *outputFileStream() const
        {
            return _ostr;
        }

    private:
        XmlFormatter( const XmlFormatter & );
        XmlFormatter &operator=( const XmlFormatter & );

       std::stringstream& testFailure( const char* file, int line, const char *failureType)
        {
            testcase->fail=true;
            element_t elt = testcase->update_element("failure");
            if ( elt->value.str().empty() )
            {
               elt->add("type",failureType);
               elt->add("line",line);
               elt->add("file",file);
            }
            else
               elt->value << CXXTEST_STD(endl);
            return elt->value;
            //failedTest(file,line,message.c_str());
        }

#if 0
        void attributeBinary( const char* name, const void *value, unsigned size )
        {
            (*_o) << name;
            (*_o) << "=\"";
            dump(value, size);
            (*_o) << "\" ";
        }

        void dump( const void *buffer, unsigned size )
        {
            if (!buffer) return;

            unsigned dumpSize = size;
            if ( maxDumpSize() && dumpSize > maxDumpSize() )
                dumpSize = maxDumpSize();

            const unsigned char *p = (const unsigned char *)buffer;
            for ( unsigned i = 0; i < dumpSize; ++ i )
                (*_o) << byteToHex( *p++ ) << " ";
            if ( dumpSize < size )
                (*_o) << "... ";
        }
#endif

        static void endl( OutputStream &o )
        {
            OutputStream::endl( o );
        }

        OutputStream *_o;
        OutputStream *_ostr;
        std::ostringstream *_os;

        TeeOutputStreams *stream_redirect;
    };
}

#endif // __CXXTEST__XMLFORMATTER_H


================================================
FILE: cxxtest/cxxtest/XmlPrinter.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__XmlPrinter_h__
#define __cxxtest__XmlPrinter_h__

//
// The XmlPrinter is a simple TestListener that
// prints JUnit style xml to the output stream
//


#include <cxxtest/Flags.h>

#ifndef _CXXTEST_HAVE_STD
#   define _CXXTEST_HAVE_STD
#endif // _CXXTEST_HAVE_STD

#include <cxxtest/XmlFormatter.h>
#include <cxxtest/StdValueTraits.h>

#include <sstream>
#ifdef _CXXTEST_OLD_STD
#   include <iostream.h>
#else // !_CXXTEST_OLD_STD
#   include <iostream>
#endif // _CXXTEST_OLD_STD

namespace CxxTest
{
    class XmlPrinter : public XmlFormatter
    {
    public:
        XmlPrinter( CXXTEST_STD(ostream) &o = CXXTEST_STD(cout), const char* /*preLine*/ = ":", const char* /*postLine*/ = "" ) :
            XmlFormatter( new Adapter(o), new Adapter(ostr), &ostr ) {}

        virtual ~XmlPrinter() 
        {
            delete outputStream(); 
            delete outputFileStream(); 
        }

    private:

        std::ostringstream ostr;

        class Adapter : public OutputStream
        {
            CXXTEST_STD(ostream) &_o;
        public:
            Adapter( CXXTEST_STD(ostream) &o ) : _o(o) {}
            void flush() { _o.flush(); }
            OutputStream &operator<<( const char *s ) { _o << s; return *this; }
            OutputStream &operator<<( Manipulator m ) { return OutputStream::operator<<( m ); }
            OutputStream &operator<<( unsigned i )
            {
                char s[1 + 3 * sizeof(unsigned)];
                numberToString( i, s );
                _o << s;
                return *this;
            }
        };
    };
}

#endif // __cxxtest__XmlPrinter_h__


================================================
FILE: cxxtest/cxxtest/YesNoRunner.h
================================================
/*
-------------------------------------------------------------------------
 CxxTest: A lightweight C++ unit testing library.
 Copyright (c) 2008 Sandia Corporation.
 This software is distributed under the LGPL License v2.1
 For more information, see the COPYING file in the top CxxTest directory.
 Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 the U.S. Government retains certain rights in this software.
-------------------------------------------------------------------------
*/

#ifndef __cxxtest__YesNoRunner_h__
#define __cxxtest__YesNoRunner_h__

//
// The YesNoRunner is a simple TestListener that
// just returns true iff all tests passed.
//

#include <cxxtest/TestRunner.h>
#include <cxxtest/TestListener.h>

namespace CxxTest 
{
    class YesNoRunner : public TestListener
    {
    public:
        YesNoRunner()
        {
        }
        
        int run()
        {
            TestRunner::runAllTests( *this );
            return tracker().failedTests();
        }
    };
}

#endif // __cxxtest__YesNoRunner_h__


================================================
FILE: cxxtest/cxxtest/__init__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

"""cxxtest: A Python package that supports the CxxTest test framework for C/C++.

.. _CxxTest: http://cxxtest.tigris.org/

CxxTest is a unit testing framework for C++ that is similar in
spirit to JUnit, CppUnit, and xUnit. CxxTest is easy to use because
it does not require precompiling a CxxTest testing library, it
employs no advanced features of C++ (e.g. RTTI) and it supports a
very flexible form of test discovery.

The cxxtest Python package includes capabilities for parsing C/C++ source files and generating
CxxTest drivers.
"""

from cxxtest.__release__ import __version__, __date__
__date__
__version__

__maintainer__ = "William E. Hart"
__maintainer_email__ = "whart222@gmail.com"
__license__ = "LGPL"
__url__ = "http://cxxtest.tigris.org/"

from cxxtest.cxxtestgen import *


================================================
FILE: cxxtest/cxxtest/__release__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

""" Release Information for cxxtest """

__version__ = '4.0.2'
__date__ = "2012-01-02"


================================================
FILE: cxxtest/cxxtest/cxx_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8

#
# This is a PLY parser for the entire ANSI C++ grammar.  This grammar was 
# adapted from the FOG grammar developed by E. D. Willink.  See
#
#    http://www.computing.surrey.ac.uk/research/dsrg/fog/
#
# for further details.
#
# The goal of this grammar is to extract information about class, function and
# class method declarations, along with their associated scope.  Thus, this 
# grammar can be used to analyze classes in an inheritance heirarchy, and then
# enumerate the methods in a derived class.
#
# This grammar parses blocks of <>, (), [] and {} in a generic manner.  Thus,
# There are several capabilities that this grammar does not support:
#
# 1. Ambiguous template specification.  This grammar cannot parse template
#       specifications that do not have paired <>'s in their declaration.  In
#       particular, ambiguous declarations like
#
#           foo<A, c<3 >();
#
#       cannot be correctly parsed.
#
# 2. Template class specialization.  Although the goal of this grammar is to
#       extract class information, specialization of templated classes is
#       not supported.  When a template class definition is parsed, it's 
#       declaration is archived without information about the template
#       parameters.  Class specializations will be stored separately, and 
#       thus they can be processed after the fact.  However, this grammar
#       does not attempt to correctly process properties of class inheritence
#       when template class specialization is employed.
#

#
# TODO: document usage of this file
#

from __future__ import division

import os
import ply.lex as lex
import ply.yacc as yacc
import re
try:
    from collections import OrderedDict
except ImportError:
    from ordereddict import OrderedDict

lexer = None
scope_lineno = 0
identifier_lineno = {}
_parse_info=None
_parsedata=None
noExceptionLogic = True

def ply_init(data):
    global _parsedata
    _parsedata=data


class Scope(object):

    def __init__(self,name,abs_name,scope_t,base_classes,lineno):
        self.function=[]
        self.name=name
        self.scope_t=scope_t
        self.sub_scopes=[]
        self.base_classes=base_classes
        self.abs_name=abs_name
        self.lineno=lineno
   
    def insert(self,scope):
        self.sub_scopes.append(scope)


class CppInfo(object):

    def __init__(self, filter=None):
        self.verbose=0
        if filter is None:
            self.filter=re.compile("[Tt][Ee][Ss][Tt]|createSuite|destroySuite")
        else:
            self.filter=filter
        self.scopes=[""]
        self.index=OrderedDict()
        self.index[""]=Scope("","::","namespace",[],1)
        self.function=[]

    def push_scope(self,ns,scope_t,base_classes=[]):
        name = self.scopes[-1]+"::"+ns
        if self.verbose>=2:
            print "-- Starting "+scope_t+" "+name
        self.scopes.append(name)
        self.index[name] = Scope(ns,name,scope_t,base_classes,scope_lineno-1)

    def pop_scope(self):
        scope = self.scopes.pop()
        if self.verbose>=2:
            print "-- Stopping "+scope
        return scope

    def add_function(self, fn):
        fn = str(fn)
        if self.filter.search(fn):
            self.index[self.scopes[-1]].function.append((fn, identifier_lineno.get(fn,lexer.lineno-1)))
            tmp = self.scopes[-1]+"::"+fn
            if self.verbose==2:
                print "-- Function declaration "+fn+"  "+tmp
            elif self.verbose==1:
                print "-- Function declaration "+tmp

    def get_functions(self,name,quiet=False):
        if name == "::":
            name = ""
        scope = self.index[name]
        fns=scope.function
        for key in scope.base_classes:
            cname = self.find_class(key,scope)
            if cname is None:
                if not quiet:
                    print "Defined classes: ",list(self.index.keys())
                    print "WARNING: Unknown class "+key
            else:
                fns += self.get_functions(cname,quiet)
        return fns
        
    def find_class(self,name,scope):
        if ':' in name:
            if name in self.index:
                return name
            else:
                return None           
        tmp = scope.abs_name.split(':')
        name1 = ":".join(tmp[:-1] + [name])
        if name1 in self.index:
            return name1
        name2 = "::"+name
        if name2 in self.index:
            return name2
        return None

    def __repr__(self):
        return str(self)

    def is_baseclass(self,cls,base):
        '''Returns true if base is a base-class of cls'''
        if cls in self.index:
            bases = self.index[cls]
        elif "::"+cls in self.index:
            bases = self.index["::"+cls]
        else:
            return False
            #raise IOError, "Unknown class "+cls
        if base in bases.base_classes:
            return True
        for name in bases.base_classes:
            if self.is_baseclass(name,base):
                return True
        return False

    def __str__(self):
        ans=""
        keys = list(self.index.keys())
        keys.sort()
        for key in keys:
            scope = self.index[key]
            ans += scope.scope_t+" "+scope.abs_name+"\n"
            if scope.scope_t == "class":
                ans += "  Base Classes: "+str(scope.base_classes)+"\n"
                for fn in self.get_functions(scope.abs_name):
                    ans += "  "+fn+"\n"
            else:
                for fn in scope.function:
                    ans += "  "+fn+"\n"
        return ans


def flatten(x):
    """Flatten nested list"""
    try:
        strtypes = basestring
    except: # for python3 etc
        strtypes = (str, bytes)

    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, strtypes):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result

#
# The lexer (and/or a preprocessor) is expected to identify the following
#
#  Punctuation:
#
#
literals = "+-*/%^&|~!<>=:()?.\'\"\\@$;,"

#
reserved = {
    'private' : 'PRIVATE',
    'protected' : 'PROTECTED',
    'public' : 'PUBLIC',

    'bool' : 'BOOL',
    'char' : 'CHAR',
    'double' : 'DOUBLE',
    'float' : 'FLOAT',
    'int' : 'INT',
    'long' : 'LONG',
    'short' : 'SHORT',
    'signed' : 'SIGNED',
    'unsigned' : 'UNSIGNED',
    'void' : 'VOID',
    'wchar_t' : 'WCHAR_T',

    'class' : 'CLASS',
    'enum' : 'ENUM',
    'namespace' : 'NAMESPACE',
    'struct' : 'STRUCT',
    'typename' : 'TYPENAME',
    'union' : 'UNION',

    'const' : 'CONST',
    'volatile' : 'VOLATILE',

    'auto' : 'AUTO',
    'explicit' : 'EXPLICIT',
    'export' : 'EXPORT',
    'extern' : 'EXTERN',
    '__extension__' : 'EXTENSION',
    'friend' : 'FRIEND',
    'inline' : 'INLINE',
    'mutable' : 'MUTABLE',
    'register' : 'REGISTER',
    'static' : 'STATIC',
    'template' : 'TEMPLATE',
    'typedef' : 'TYPEDEF',
    'using' : 'USING',
    'virtual' : 'VIRTUAL',

    'asm' : 'ASM',
    'break' : 'BREAK',
    'case' : 'CASE',
    'catch' : 'CATCH',
    'const_cast' : 'CONST_CAST',
    'continue' : 'CONTINUE',
    'default' : 'DEFAULT',
    'delete' : 'DELETE',
    'do' : 'DO',
    'dynamic_cast' : 'DYNAMIC_CAST',
    'else' : 'ELSE',
    'false' : 'FALSE',
    'for' : 'FOR',
    'goto' : 'GOTO',
    'if' : 'IF',
    'new' : 'NEW',
    'operator' : 'OPERATOR',
    'reinterpret_cast' : 'REINTERPRET_CAST',
    'return' : 'RETURN',
    'sizeof' : 'SIZEOF',
    'static_cast' : 'STATIC_CAST',
    'switch' : 'SWITCH',
    'this' : 'THIS',
    'throw' : 'THROW',
    'true' : 'TRUE',
    'try' : 'TRY',
    'typeid' : 'TYPEID',
    'while' : 'WHILE',
    '"C"' : 'CLiteral',
    '"C++"' : 'CppLiteral',

    '__attribute__' : 'ATTRIBUTE',
    '__cdecl__' : 'CDECL',
    '__typeof' : 'uTYPEOF',
    'typeof' : 'TYPEOF', 

    'CXXTEST_STD' : 'CXXTEST_STD'
}
   
tokens = [
    "CharacterLiteral",
    "FloatingLiteral",
    "Identifier",
    "IntegerLiteral",
    "StringLiteral",
 "RBRACE",
 "LBRACE",
 "RBRACKET",
 "LBRACKET",
 "ARROW",
 "ARROW_STAR",
 "DEC",
 "EQ",
 "GE",
 "INC",
 "LE",
 "LOG_AND",
 "LOG_OR",
 "NE",
 "SHL",
 "SHR",
 "ASS_ADD",
 "ASS_AND",
 "ASS_DIV",
 "ASS_MOD",
 "ASS_MUL",
 "ASS_OR",
 "ASS_SHL",
 "ASS_SHR",
 "ASS_SUB",
 "ASS_XOR",
 "DOT_STAR",
 "ELLIPSIS",
 "SCOPE",
] + list(reserved.values())

t_ignore = " \t\r"

t_LBRACE = r"(\{)|(<%)"
t_RBRACE = r"(\})|(%>)"
t_LBRACKET = r"(\[)|(<:)"
t_RBRACKET = r"(\])|(:>)"
t_ARROW = r"->"
t_ARROW_STAR = r"->\*"
t_DEC = r"--"
t_EQ = r"=="
t_GE = r">="
t_INC = r"\+\+"
t_LE = r"<="
t_LOG_AND = r"&&"
t_LOG_OR = r"\|\|"
t_NE = r"!="
t_SHL = r"<<"
t_SHR = r">>"
t_ASS_ADD = r"\+="
t_ASS_AND = r"&="
t_ASS_DIV = r"/="
t_ASS_MOD = r"%="
t_ASS_MUL = r"\*="
t_ASS_OR  = r"\|="
t_ASS_SHL = r"<<="
t_ASS_SHR = r">>="
t_ASS_SUB = r"-="
t_ASS_XOR = r"^="
t_DOT_STAR = r"\.\*"
t_ELLIPSIS = r"\.\.\."
t_SCOPE = r"::"

# Discard comments
def t_COMMENT(t):
    r'(/\*(.|\n)*?\*/)|(//.*?\n)|(\#.*?\n)'
    t.lexer.lineno += t.value.count("\n")

t_IntegerLiteral = r'(0x[0-9A-F]+)|([0-9]+(L){0,1})'
t_FloatingLiteral = r"[0-9]+[eE\.\+-]+[eE\.\+\-0-9]+"
t_CharacterLiteral = r'\'([^\'\\]|\\.)*\''
#t_StringLiteral = r'"([^"\\]|\\.)*"'
def t_StringLiteral(t):
    r'"([^"\\]|\\.)*"'
    t.type = reserved.get(t.value,'StringLiteral')
    return t

def t_Identifier(t):
    r"[a-zA-Z_][a-zA-Z_0-9\.]*"
    t.type = reserved.get(t.value,'Identifier')
    return t


def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    #raise IOError, "Parse error"
    #t.lexer.skip()

def t_newline(t):
    r'[\n]+'
    t.lexer.lineno += len(t.value)

precedence = (
    ( 'right', 'SHIFT_THERE', 'REDUCE_HERE_MOSTLY', 'SCOPE'),
    ( 'nonassoc', 'ELSE', 'INC', 'DEC', '+', '-', '*', '&', 'LBRACKET', 'LBRACE', '<', ':', ')')
    )

start = 'translation_unit'

#
#  The %prec resolves the 14.2-3 ambiguity:
#  Identifier '<' is forced to go through the is-it-a-template-name test
#  All names absorb TEMPLATE with the name, so that no template_test is 
#  performed for them.  This requires all potential declarations within an 
#  expression to perpetuate this policy and thereby guarantee the ultimate 
#  coverage of explicit_instantiation.
#
#  The %prec also resolves a conflict in identifier : which is forced to be a 
#  shift of a label for a labeled-statement rather than a reduction for the 
#  name of a bit-field or generalised constructor.  This is pretty dubious 
#  syntactically but correct for all semantic possibilities.  The shift is 
#  only activated when the ambiguity exists at the start of a statement. 
#  In this context a bit-field declaration or constructor definition are not 
#  allowed.
#

def p_identifier(p):
    '''identifier : Identifier
    |               CXXTEST_STD '(' Identifier ')'
    '''
    if p[1][0] in ('t','T','c','d'):
        identifier_lineno[p[1]] = p.lineno(1)
    p[0] = p[1]

def p_id(p):
    '''id :                         identifier %prec SHIFT_THERE
    |                               template_decl
    |                               TEMPLATE id
    '''
    p[0] = get_rest(p)

def p_global_scope(p):
    '''global_scope :               SCOPE
    '''
    p[0] = get_rest(p)

def p_id_scope(p):
    '''id_scope : id SCOPE'''
    p[0] = get_rest(p)

def p_id_scope_seq(p):
    '''id_scope_seq :                id_scope
    |                                id_scope id_scope_seq
    '''
    p[0] = get_rest(p)

#
#  A :: B :: C; is ambiguous How much is type and how much name ?
#  The %prec maximises the (type) length which is the 7.1-2 semantic constraint.
#
def p_nested_id(p):
    '''nested_id :                  id %prec SHIFT_THERE
    |                               id_scope nested_id
    '''
    p[0] = get_rest(p)

def p_scoped_id(p):
    '''scoped_id :                  nested_id
    |                               global_scope nested_id
    |                               id_scope_seq
    |                               global_scope id_scope_seq
    '''
    global scope_lineno
    scope_lineno = lexer.lineno
    data = flatten(get_rest(p))
    if data[0] != None:
        p[0] = "".join(data)

#
#  destructor_id has to be held back to avoid a conflict with a one's 
#  complement as per 5.3.1-9, It gets put back only when scoped or in a 
#  declarator_id, which is only used as an explicit member name.
#  Declarations of an unscoped destructor are always parsed as a one's 
#  complement.
#
def p_destructor_id(p):
    '''destructor_id :              '~' id
    |                               TEMPLATE destructor_id
    '''
    p[0]=get_rest(p)

#def p_template_id(p):
#    '''template_id :                empty
#    |                               TEMPLATE
#    '''
#    pass

def p_template_decl(p):
    '''template_decl :              identifier '<' nonlgt_seq_opt '>'
    '''
    #
    # WEH: should we include the lt/gt symbols to indicate that this is a
    # template class?  How is that going to be used later???
    #
    #p[0] = [p[1] ,"<",">"]
    p[0] = p[1]

def p_special_function_id(p):
    '''special_function_id :        conversion_function_id
    |                               operator_function_id
    |                               TEMPLATE special_function_id
    '''
    p[0]=get_rest(p)

def p_nested_special_function_id(p):
    '''nested_special_function_id : special_function_id
    |                               id_scope destructor_id
    |                               id_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

def p_scoped_special_function_id(p):
    '''scoped_special_function_id : nested_special_function_id
    |                               global_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

# declarator-id is all names in all scopes, except reserved words
def p_declarator_id(p):
    '''declarator_id :              scoped_id
    |                               scoped_special_function_id
    |                               destructor_id
    '''
    p[0]=p[1]

#
# The standard defines pseudo-destructors in terms of type-name, which is 
# class/enum/typedef, of which class-name is covered by a normal destructor. 
# pseudo-destructors are supposed to support ~int() in templates, so the 
# grammar here covers built-in names. Other names are covered by the lack 
# of identifier/type discrimination.
#
def p_built_in_type_id(p):
    '''built_in_type_id :           built_in_type_specifier
    |                               built_in_type_id built_in_type_specifier
    '''
    pass

def p_pseudo_destructor_id(p):
    '''pseudo_destructor_id :       built_in_type_id SCOPE '~' built_in_type_id
    |                               '~' built_in_type_id
    |                               TEMPLATE pseudo_destructor_id
    '''
    pass

def p_nested_pseudo_destructor_id(p):
    '''nested_pseudo_destructor_id : pseudo_destructor_id
    |                               id_scope nested_pseudo_destructor_id
    '''
    pass

def p_scoped_pseudo_destructor_id(p):
    '''scoped_pseudo_destructor_id : nested_pseudo_destructor_id
    |                               global_scope scoped_pseudo_destructor_id
    '''
    pass

#-------------------------------------------------------------------------------
# A.2 Lexical conventions
#-------------------------------------------------------------------------------
#

def p_literal(p):
    '''literal :                    IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               TRUE
    |                               FALSE
    '''
    pass

#-------------------------------------------------------------------------------
# A.3 Basic concepts
#-------------------------------------------------------------------------------
def p_translation_unit(p):
    '''translation_unit :           declaration_seq_opt
    '''
    pass

#-------------------------------------------------------------------------------
# A.4 Expressions
#-------------------------------------------------------------------------------
#
#  primary_expression covers an arbitrary sequence of all names with the 
#  exception of an unscoped destructor, which is parsed as its unary expression 
#  which is the correct disambiguation (when ambiguous).  This eliminates the 
#  traditional A(B) meaning A B ambiguity, since we never have to tack an A 
#  onto the front of something that might start with (. The name length got 
#  maximised ab initio. The downside is that semantic interpretation must split 
#  the names up again.
#
#  Unification of the declaration and expression syntax means that unary and 
#  binary pointer declarator operators:
#      int * * name
#  are parsed as binary and unary arithmetic operators (int) * (*name). Since 
#  type information is not used
#  ambiguities resulting from a cast
#      (cast)*(value)
#  are resolved to favour the binary rather than the cast unary to ease AST 
#  clean-up. The cast-call ambiguity must be resolved to the cast to ensure 
#  that (a)(b)c can be parsed.
#
#  The problem of the functional cast ambiguity
#      name(arg)
#  as call or declaration is avoided by maximising the name within the parsing 
#  kernel. So  primary_id_expression picks up 
#      extern long int const var = 5;
#  as an assignment to the syntax parsed as "extern long int const var". The 
#  presence of two names is parsed so that "extern long into const" is 
#  distinguished from "var" considerably simplifying subsequent 
#  semantic resolution.
#
#  The generalised name is a concatenation of potential type-names (scoped 
#  identifiers or built-in sequences) plus optionally one of the special names 
#  such as an operator-function-id, conversion-function-id or destructor as the 
#  final name. 
#

def get_rest(p):
    return [p[i] for i in range(1, len(p))]

def p_primary_expression(p):
    '''primary_expression :         literal
    |                               THIS
    |                               suffix_decl_specified_ids
    |                               abstract_expression %prec REDUCE_HERE_MOSTLY
    '''
    p[0] = get_rest(p)

#
#  Abstract-expression covers the () and [] of abstract-declarators.
#
def p_abstract_expression(p):
    '''abstract_expression :        parenthesis_clause
    |                               LBRACKET bexpression_opt RBRACKET
    |                               TEMPLATE abstract_expression
    '''
    pass

def p_postfix_expression(p):
    '''postfix_expression :         primary_expression
    |                               postfix_expression parenthesis_clause
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET attributes
    |                               postfix_expression '.' declarator_id
    |                               postfix_expression '.' scoped_pseudo_destructor_id
    |                               postfix_expression ARROW declarator_id
    |                               postfix_expression ARROW scoped_pseudo_destructor_id   
    |                               postfix_expression INC
    |                               postfix_expression DEC
    |                               DYNAMIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               STATIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               REINTERPRET_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               CONST_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               TYPEID parameters_clause
    '''
    #print "HERE",str(p[1])
    p[0] = get_rest(p)

def p_bexpression_opt(p):
    '''bexpression_opt :            empty
    |                               bexpression
    '''
    pass

def p_bexpression(p):
    '''bexpression :                nonbracket_seq
    |                               nonbracket_seq bexpression_seq bexpression_clause nonbracket_seq_opt
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_seq(p):
    '''bexpression_seq :            empty
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_clause(p):
    '''bexpression_clause :          LBRACKET bexpression_opt RBRACKET
    '''
    pass


def p_expression_list_opt(p):
    '''expression_list_opt :        empty
    |                               expression_list
    '''
    pass

def p_expression_list(p):
    '''expression_list :            assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    pass

def p_unary_expression(p):
    '''unary_expression :           postfix_expression
    |                               INC cast_expression
    |                               DEC cast_expression
    |                               ptr_operator cast_expression
    |                               suffix_decl_specified_scope star_ptr_operator cast_expression
    |                               '+' cast_expression
    |                               '-' cast_expression
    |                               '!' cast_expression
    |                               '~' cast_expression
    |                               SIZEOF unary_expression
    |                               new_expression
    |                               global_scope new_expression
    |                               delete_expression
    |                               global_scope delete_expression
    '''
    p[0] = get_rest(p)

def p_delete_expression(p):
    '''delete_expression :          DELETE cast_expression
    '''
    pass

def p_new_expression(p):
    '''new_expression :             NEW new_type_id new_initializer_opt
    |                               NEW parameters_clause new_type_id new_initializer_opt
    |                               NEW parameters_clause
    |                               NEW parameters_clause parameters_clause new_initializer_opt
    '''
    pass

def p_new_type_id(p):
    '''new_type_id :                type_specifier ptr_operator_seq_opt
    |                               type_specifier new_declarator
    |                               type_specifier new_type_id
    '''
    pass

def p_new_declarator(p):
    '''new_declarator :             ptr_operator new_declarator
    |                               direct_new_declarator
    '''
    pass

def p_direct_new_declarator(p):
    '''direct_new_declarator :      LBRACKET bexpression_opt RBRACKET
    |                               direct_new_declarator LBRACKET bexpression RBRACKET
    '''
    pass

def p_new_initializer_opt(p):
    '''new_initializer_opt :        empty
    |                               '(' expression_list_opt ')'
    '''
    pass

#
# cast-expression is generalised to support a [] as well as a () prefix. This covers the omission of 
# DELETE[] which when followed by a parenthesised expression was ambiguous. It also covers the gcc 
# indexed array initialisation for free.
#
def p_cast_expression(p):
    '''cast_expression :            unary_expression
    |                               abstract_expression cast_expression
    '''
    p[0] = get_rest(p)

def p_pm_expression(p):
    '''pm_expression :              cast_expression
    |                               pm_expression DOT_STAR cast_expression
    |                               pm_expression ARROW_STAR cast_expression
    '''
    p[0] = get_rest(p)

def p_multiplicative_expression(p):
    '''multiplicative_expression :  pm_expression
    |                               multiplicative_expression star_ptr_operator pm_expression
    |                               multiplicative_expression '/' pm_expression
    |                               multiplicative_expression '%' pm_expression
    '''
    p[0] = get_rest(p)

def p_additive_expression(p):
    '''additive_expression :        multiplicative_expression
    |                               additive_expression '+' multiplicative_expression
    |                               additive_expression '-' multiplicative_expression
    '''
    p[0] = get_rest(p)

def p_shift_expression(p):
    '''shift_expression :           additive_expression
    |                               shift_expression SHL additive_expression
    |                               shift_expression SHR additive_expression
    '''
    p[0] = get_rest(p)

#    |                               relational_expression '<' shift_expression
#    |                               relational_expression '>' shift_expression
#    |                               relational_expression LE shift_expression
#    |                               relational_expression GE shift_expression
def p_relational_expression(p):
    '''relational_expression :      shift_expression
    '''
    p[0] = get_rest(p)

def p_equality_expression(p):
    '''equality_expression :        relational_expression
    |                               equality_expression EQ relational_expression
    |                               equality_expression NE relational_expression
    '''
    p[0] = get_rest(p)

def p_and_expression(p):
    '''and_expression :             equality_expression
    |                               and_expression '&' equality_expression
    '''
    p[0] = get_rest(p)

def p_exclusive_or_expression(p):
    '''exclusive_or_expression :    and_expression
    |                               exclusive_or_expression '^' and_expression
    '''
    p[0] = get_rest(p)

def p_inclusive_or_expression(p):
    '''inclusive_or_expression :    exclusive_or_expression
    |                               inclusive_or_expression '|' exclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_and_expression(p):
    '''logical_and_expression :     inclusive_or_expression
    |                               logical_and_expression LOG_AND inclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_or_expression(p):
    '''logical_or_expression :      logical_and_expression
    |                               logical_or_expression LOG_OR logical_and_expression
    '''
    p[0] = get_rest(p)

def p_conditional_expression(p):
    '''conditional_expression :     logical_or_expression
    |                               logical_or_expression '?' expression ':' assignment_expression
    '''
    p[0] = get_rest(p)


#
# assignment-expression is generalised to cover the simple assignment of a braced initializer in order to 
# contribute to the coverage of parameter-declaration and init-declaration.
#
#    |                               logical_or_expression assignment_operator assignment_expression
def p_assignment_expression(p):
    '''assignment_expression :      conditional_expression
    |                               logical_or_expression assignment_operator nonsemicolon_seq
    |                               logical_or_expression '=' braced_initializer
    |                               throw_expression
    '''
    p[0]=get_rest(p)

def p_assignment_operator(p):
    '''assignment_operator :        '=' 
                           | ASS_ADD
                           | ASS_AND
                           | ASS_DIV
                           | ASS_MOD
                           | ASS_MUL
                           | ASS_OR
                           | ASS_SHL
                           | ASS_SHR
                           | ASS_SUB
                           | ASS_XOR
    '''
    pass

#
# expression is widely used and usually single-element, so the reductions are arranged so that a
# single-element expression is returned as is. Multi-element expressions are parsed as a list that
# may then behave polymorphically as an element or be compacted to an element.
#

def p_expression(p):
    '''expression :                 assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    p[0] = get_rest(p)

def p_constant_expression(p):
    '''constant_expression :        conditional_expression
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.5 Statements
#---------------------------------------------------------------------------------------------------
# Parsing statements is easy once simple_declaration has been generalised to cover expression_statement.
#
#
# The use of extern here is a hack.  The 'extern "C" {}' block gets parsed
# as a function, so when nested 'extern "C"' declarations exist, they don't
# work because the block is viewed as a list of statements... :(
#
def p_statement(p):
    '''statement :                  compound_statement
    |                               declaration_statement
    |                               try_block
    |                               labeled_statement
    |                               selection_statement
    |                               iteration_statement
    |                               jump_statement
    '''
    pass

def p_compound_statement(p):
    '''compound_statement :         LBRACE statement_seq_opt RBRACE
    '''
    pass

def p_statement_seq_opt(p):
    '''statement_seq_opt :          empty
    |                               statement_seq_opt statement
    '''
    pass

#
#  The dangling else conflict is resolved to the innermost if.
#
def p_selection_statement(p):
    '''selection_statement :        IF '(' condition ')' statement    %prec SHIFT_THERE
    |                               IF '(' condition ')' statement ELSE statement
    |                               SWITCH '(' condition ')' statement
    '''
    pass

def p_condition_opt(p):
    '''condition_opt :              empty
    |                               condition
    '''
    pass

def p_condition(p):
    '''condition :                  nonparen_seq
    |                               nonparen_seq condition_seq parameters_clause nonparen_seq_opt
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_condition_seq(p):
    '''condition_seq :              empty
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_labeled_statement(p):
    '''labeled_statement :          identifier ':' statement
    |                               CASE constant_expression ':' statement
    |                               DEFAULT ':' statement
    '''
    pass

def p_try_block(p):
    '''try_block :                  TRY compound_statement handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_jump_statement(p):
    '''jump_statement :             BREAK ';'
    |                               CONTINUE ';'
    |                               RETURN nonsemicolon_seq ';'
    |                               GOTO identifier ';'
    '''
    pass

def p_iteration_statement(p):
    '''iteration_statement :        WHILE '(' condition ')' statement
    |                               DO statement WHILE '(' expression ')' ';'
    |                               FOR '(' nonparen_seq_opt ')' statement
    '''
    pass

def p_declaration_statement(p):
    '''declaration_statement :      block_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.6 Declarations
#---------------------------------------------------------------------------------------------------
def p_compound_declaration(p):
    '''compound_declaration :       LBRACE declaration_seq_opt RBRACE                            
    '''
    pass

def p_declaration_seq_opt(p):
    '''declaration_seq_opt :        empty
    |                               declaration_seq_opt declaration
    '''
    pass

def p_declaration(p):
    '''declaration :                block_declaration
    |                               function_definition
    |                               template_declaration
    |                               explicit_specialization
    |                               specialised_declaration
    '''
    pass

def p_specialised_declaration(p):
    '''specialised_declaration :    linkage_specification
    |                               namespace_definition
    |                               TEMPLATE specialised_declaration
    '''
    pass

def p_block_declaration(p):
    '''block_declaration :          simple_declaration
    |                               specialised_block_declaration
    '''
    pass

def p_specialised_block_declaration(p):
    '''specialised_block_declaration :      asm_definition
    |                               namespace_alias_definition
    |                               using_declaration
    |                               using_directive
    |                               TEMPLATE specialised_block_declaration
    '''
    pass

def p_simple_declaration(p):
    '''simple_declaration :         ';'
    |                               init_declaration ';'
    |                               init_declarations ';'
    |                               decl_specifier_prefix simple_declaration
    '''
    global _parse_info
    if len(p) == 3:
        if p[2] == ";":
            decl = p[1]
        else:
            decl = p[2]
        if decl is not None:
            fp = flatten(decl)
            if len(fp) >= 2 and fp[0] is not None and fp[0]!="operator" and fp[1] == '(':
                p[0] = fp[0]
                _parse_info.add_function(fp[0])

#
#  A decl-specifier following a ptr_operator provokes a shift-reduce conflict for * const name which is resolved in favour of the pointer, and implemented by providing versions of decl-specifier guaranteed not to start with a cv_qualifier.  decl-specifiers are implemented type-centrically. That is the semantic constraint that there must be a type is exploited to impose structure, but actually eliminate very little syntax. built-in types are multi-name and so need a different policy.
#
#  non-type decl-specifiers are bound to the left-most type in a decl-specifier-seq, by parsing from the right and attaching suffixes to the right-hand type. Finally residual prefixes attach to the left.                
#
def p_suffix_built_in_decl_specifier_raw(p):
    '''suffix_built_in_decl_specifier_raw : built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw decl_specifier_suffix
    '''
    pass

def p_suffix_built_in_decl_specifier(p):
    '''suffix_built_in_decl_specifier :     suffix_built_in_decl_specifier_raw
    |                               TEMPLATE suffix_built_in_decl_specifier
    '''
    pass

#    |                                       id_scope_seq
#    |                                       SCOPE id_scope_seq
def p_suffix_named_decl_specifier(p):
    '''suffix_named_decl_specifier :        scoped_id 
    |                               elaborate_type_specifier 
    |                               suffix_named_decl_specifier decl_specifier_suffix
    '''
    p[0]=get_rest(p)

def p_suffix_named_decl_specifier_bi(p):
    '''suffix_named_decl_specifier_bi :     suffix_named_decl_specifier
    |                               suffix_named_decl_specifier suffix_built_in_decl_specifier_raw
    '''
    p[0] = get_rest(p)
    #print "HERE",get_rest(p)

def p_suffix_named_decl_specifiers(p):
    '''suffix_named_decl_specifiers :       suffix_named_decl_specifier_bi
    |                               suffix_named_decl_specifiers suffix_named_decl_specifier_bi
    '''
    p[0] = get_rest(p)

def p_suffix_named_decl_specifiers_sf(p):
    '''suffix_named_decl_specifiers_sf :    scoped_special_function_id
    |                               suffix_named_decl_specifiers
    |                               suffix_named_decl_specifiers scoped_special_function_id
    '''
    #print "HERE",get_rest(p)
    p[0] = get_rest(p)

def p_suffix_decl_specified_ids(p):
    '''suffix_decl_specified_ids :          suffix_built_in_decl_specifier
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers_sf
    |                               suffix_named_decl_specifiers_sf
    '''
    if len(p) == 3:
        p[0] = p[2]
    else:
        p[0] = p[1]

def p_suffix_decl_specified_scope(p):
    '''suffix_decl_specified_scope : suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier SCOPE
    '''
    p[0] = get_rest(p)

def p_decl_specifier_affix(p):
    '''decl_specifier_affix :       storage_class_specifier
    |                               function_specifier
    |                               FRIEND
    |                               TYPEDEF
    |                               cv_qualifier
    '''
    pass

def p_decl_specifier_suffix(p):
    '''decl_specifier_suffix :      decl_specifier_affix
    '''
    pass

def p_decl_specifier_prefix(p):
    '''decl_specifier_prefix :      decl_specifier_affix
    |                               TEMPLATE decl_specifier_prefix
    '''
    pass

def p_storage_class_specifier(p):
    '''storage_class_specifier :    REGISTER 
    |                               STATIC 
    |                               MUTABLE
    |                               EXTERN                  %prec SHIFT_THERE
    |                               EXTENSION
    |                               AUTO
    '''
    pass

def p_function_specifier(p):
    '''function_specifier :         EXPLICIT
    |                               INLINE
    |                               VIRTUAL
    '''
    pass

def p_type_specifier(p):
    '''type_specifier :             simple_type_specifier
    |                               elaborate_type_specifier
    |                               cv_qualifier
    '''
    pass

def p_elaborate_type_specifier(p):
    '''elaborate_type_specifier :   class_specifier
    |                               enum_specifier
    |                               elaborated_type_specifier
    |                               TEMPLATE elaborate_type_specifier
    '''
    pass

def p_simple_type_specifier(p):
    '''simple_type_specifier :      scoped_id
    |                               scoped_id attributes
    |                               built_in_type_specifier
    '''
    p[0] = p[1]

def p_built_in_type_specifier(p):
    '''built_in_type_specifier : Xbuilt_in_type_specifier
    |                            Xbuilt_in_type_specifier attributes
    '''
    pass

def p_attributes(p):
    '''attributes :                 attribute
    |                               attributes attribute
    '''
    pass

def p_attribute(p):
    '''attribute :                  ATTRIBUTE '(' parameters_clause ')'
    '''

def p_Xbuilt_in_type_specifier(p):
    '''Xbuilt_in_type_specifier :    CHAR 
    | WCHAR_T 
    | BOOL 
    | SHORT 
    | INT 
    | LONG 
    | SIGNED 
    | UNSIGNED 
    | FLOAT 
    | DOUBLE 
    | VOID
    | uTYPEOF parameters_clause
    | TYPEOF parameters_clause
    '''
    pass

#
#  The over-general use of declaration_expression to cover decl-specifier-seq_opt declarator in a function-definition means that
#      class X { };
#  could be a function-definition or a class-specifier.
#      enum X { };
#  could be a function-definition or an enum-specifier.
#  The function-definition is not syntactically valid so resolving the false conflict in favour of the
#  elaborated_type_specifier is correct.
#
def p_elaborated_type_specifier(p):
    '''elaborated_type_specifier :  class_key scoped_id %prec SHIFT_THERE
    |                               elaborated_enum_specifier
    |                               TYPENAME scoped_id
    '''
    pass

def p_elaborated_enum_specifier(p):
    '''elaborated_enum_specifier :  ENUM scoped_id   %prec SHIFT_THERE
    '''
    pass

def p_enum_specifier(p):
    '''enum_specifier :             ENUM scoped_id enumerator_clause
    |                               ENUM enumerator_clause
    '''
    pass

def p_enumerator_clause(p):
    '''enumerator_clause :          LBRACE enumerator_list_ecarb
    |                               LBRACE enumerator_list enumerator_list_ecarb
    |                               LBRACE enumerator_list ',' enumerator_definition_ecarb
    '''
    pass

def p_enumerator_list_ecarb(p):
    '''enumerator_list_ecarb :      RBRACE
    '''
    pass

def p_enumerator_definition_ecarb(p):
    '''enumerator_definition_ecarb :        RBRACE
    '''
    pass

def p_enumerator_definition_filler(p):
    '''enumerator_definition_filler :       empty
    '''
    pass

def p_enumerator_list_head(p):
    '''enumerator_list_head :       enumerator_definition_filler
    |                               enumerator_list ',' enumerator_definition_filler
    '''
    pass

def p_enumerator_list(p):
    '''enumerator_list :            enumerator_list_head enumerator_definition
    '''
    pass

def p_enumerator_definition(p):
    '''enumerator_definition :      enumerator
    |                               enumerator '=' constant_expression
    '''
    pass

def p_enumerator(p):
    '''enumerator :                 identifier
    '''
    pass

def p_namespace_definition(p):
    '''namespace_definition :       NAMESPACE scoped_id push_scope compound_declaration
    |                               NAMESPACE push_scope compound_declaration
    '''
    global _parse_info
    scope = _parse_info.pop_scope()

def p_namespace_alias_definition(p):
    '''namespace_alias_definition : NAMESPACE scoped_id '=' scoped_id ';'
    '''
    pass

def p_push_scope(p):
    '''push_scope :                 empty'''
    global _parse_info
    if p[-2] == "namespace":
        scope=p[-1]
    else:
        scope=""
    _parse_info.push_scope(scope,"namespace")

def p_using_declaration(p):
    '''using_declaration :          USING declarator_id ';'
    |                               USING TYPENAME declarator_id ';'
    '''
    pass

def p_using_directive(p):
    '''using_directive :            USING NAMESPACE scoped_id ';'
    '''
    pass

#    '''asm_definition :             ASM '(' StringLiteral ')' ';'
def p_asm_definition(p):
    '''asm_definition :             ASM '(' nonparen_seq_opt ')' ';'
    '''
    pass

def p_linkage_specification(p):
    '''linkage_specification :      EXTERN CLiteral declaration
    |                               EXTERN CLiteral compound_declaration
    |                               EXTERN CppLiteral declaration
    |                               EXTERN CppLiteral compound_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.7 Declarators
#---------------------------------------------------------------------------------------------------
#
# init-declarator is named init_declaration to reflect the embedded decl-specifier-seq_opt
#

def p_init_declarations(p):
    '''init_declarations :          assignment_expression ',' init_declaration
    |                               init_declarations ',' init_declaration
    '''
    p[0]=get_rest(p)

def p_init_declaration(p):
    '''init_declaration :           assignment_expression
    '''
    p[0]=get_rest(p)

def p_star_ptr_operator(p):
    '''star_ptr_operator :          '*'
    |                               star_ptr_operator cv_qualifier
    '''
    pass

def p_nested_ptr_operator(p):
    '''nested_ptr_operator :        star_ptr_operator
    |                               id_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator(p):
    '''ptr_operator :               '&'
    |                               nested_ptr_operator
    |                               global_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator_seq(p):
    '''ptr_operator_seq :           ptr_operator
    |                               ptr_operator ptr_operator_seq
    '''
    pass

#
# Independently coded to localise the shift-reduce conflict: sharing just needs another %prec
#
def p_ptr_operator_seq_opt(p):
    '''ptr_operator_seq_opt :       empty %prec SHIFT_THERE
    |                               ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_cv_qualifier_seq_opt(p):
    '''cv_qualifier_seq_opt :       empty
    |                               cv_qualifier_seq_opt cv_qualifier
    '''
    pass

# TODO: verify that we should include attributes here
def p_cv_qualifier(p):
    '''cv_qualifier :               CONST 
    |                               VOLATILE
    |                               attributes
    '''
    pass

def p_type_id(p):
    '''type_id :                    type_specifier abstract_declarator_opt
    |                               type_specifier type_id
    '''
    pass

def p_abstract_declarator_opt(p):
    '''abstract_declarator_opt :    empty
    |                               ptr_operator abstract_declarator_opt
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator_opt(p):
    '''direct_abstract_declarator_opt :     empty
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator(p):
    '''direct_abstract_declarator : direct_abstract_declarator_opt parenthesis_clause
    |                               direct_abstract_declarator_opt LBRACKET RBRACKET
    |                               direct_abstract_declarator_opt LBRACKET bexpression RBRACKET
    '''
    pass

def p_parenthesis_clause(p):
    '''parenthesis_clause :         parameters_clause cv_qualifier_seq_opt
    |                               parameters_clause cv_qualifier_seq_opt exception_specification
    '''
    p[0] = ['(',')']

def p_parameters_clause(p):
    '''parameters_clause :          '(' condition_opt ')'
    '''
    p[0] = ['(',')']

#
# A typed abstract qualifier such as
#      Class * ...
# looks like a multiply, so pointers are parsed as their binary operation equivalents that
# ultimately terminate with a degenerate right hand term.
#
def p_abstract_pointer_declaration(p):
    '''abstract_pointer_declaration :       ptr_operator_seq
    |                               multiplicative_expression star_ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_abstract_parameter_declaration(p):
    '''abstract_parameter_declaration :     abstract_pointer_declaration
    |                               and_expression '&'
    |                               and_expression '&' abstract_pointer_declaration
    '''
    pass

def p_special_parameter_declaration(p):
    '''special_parameter_declaration :      abstract_parameter_declaration
    |                               abstract_parameter_declaration '=' assignment_expression
    |                               ELLIPSIS
    '''
    pass

def p_parameter_declaration(p):
    '''parameter_declaration :      assignment_expression
    |                               special_parameter_declaration
    |                               decl_specifier_prefix parameter_declaration
    '''
    pass

#
# function_definition includes constructor, destructor, implicit int definitions too.  A local destructor is successfully parsed as a function-declaration but the ~ was treated as a unary operator.  constructor_head is the prefix ambiguity between a constructor and a member-init-list starting with a bit-field.
#
def p_function_definition(p):
    '''function_definition :        ctor_definition
    |                               func_definition
    '''
    pass

def p_func_definition(p):
    '''func_definition :            assignment_expression function_try_block
    |                               assignment_expression function_body
    |                               decl_specifier_prefix func_definition
    '''
    global _parse_info
    if p[2] is not None and p[2][0] == '{':
        decl = flatten(p[1])
        #print "HERE",decl
        if decl[-1] == ')':
            decl=decl[-3]
        else:
            decl=decl[-1]
        p[0] = decl
        if decl != "operator":
            _parse_info.add_function(decl)
    else:
        p[0] = p[2]

def p_ctor_definition(p):
    '''ctor_definition :            constructor_head function_try_block
    |                               constructor_head function_body
    |                               decl_specifier_prefix ctor_definition
    '''
    if p[2] is None or p[2][0] == "try" or p[2][0] == '{':
        p[0]=p[1]
    else:
        p[0]=p[1]

def p_constructor_head(p):
    '''constructor_head :           bit_field_init_declaration
    |                               constructor_head ',' assignment_expression
    '''
    p[0]=p[1]

def p_function_try_block(p):
    '''function_try_block :         TRY function_block handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False
    p[0] = ['try']

def p_function_block(p):
    '''function_block :             ctor_initializer_opt function_body
    '''
    pass

def p_function_body(p):
    '''function_body :              LBRACE nonbrace_seq_opt RBRACE 
    '''
    p[0] = ['{','}']

def p_initializer_clause(p):
    '''initializer_clause :         assignment_expression
    |                               braced_initializer
    '''
    pass

def p_braced_initializer(p):
    '''braced_initializer :         LBRACE initializer_list RBRACE
    |                               LBRACE initializer_list ',' RBRACE
    |                               LBRACE RBRACE
    '''
    pass

def p_initializer_list(p):
    '''initializer_list :           initializer_clause
    |                               initializer_list ',' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.8 Classes
#---------------------------------------------------------------------------------------------------
#
#  An anonymous bit-field declaration may look very like inheritance:
#      const int B = 3;
#      class A : B ;
#  The two usages are too distant to try to create and enforce a common prefix so we have to resort to
#  a parser hack by backtracking. Inheritance is much the most likely so we mark the input stream context
#  and try to parse a base-clause. If we successfully reach a { the base-clause is ok and inheritance was
#  the correct choice so we unmark and continue. If we fail to find the { an error token causes 
#  back-tracking to the alternative parse in elaborated_type_specifier which regenerates the : and 
#  declares unconditional success.
#

def p_class_specifier_head(p):
    '''class_specifier_head :       class_key scoped_id ':' base_specifier_list LBRACE
    |                               class_key ':' base_specifier_list LBRACE
    |                               class_key scoped_id LBRACE
    |                               class_key LBRACE
    '''
    global _parse_info
    base_classes=[]
    if len(p) == 6:
        scope = p[2]
        base_classes = p[4]
    elif len(p) == 4:
        scope = p[2]
    elif len(p) == 5:
        base_classes = p[3]
    else:
        scope = ""
    _parse_info.push_scope(scope,p[1],base_classes)
    

def p_class_key(p):
    '''class_key :                  CLASS 
    | STRUCT 
    | UNION
    '''
    p[0] = p[1]

def p_class_specifier(p):
    '''class_specifier :            class_specifier_head member_specification_opt RBRACE
    '''
    scope = _parse_info.pop_scope()

def p_member_specification_opt(p):
    '''member_specification_opt :   empty
    |                               member_specification_opt member_declaration
    '''
    pass

def p_member_declaration(p):
    '''member_declaration :         accessibility_specifier
    |                               simple_member_declaration
    |                               function_definition
    |                               using_declaration
    |                               template_declaration
    '''
    p[0] = get_rest(p)
    #print "Decl",get_rest(p)

#
#  The generality of constructor names (there need be no parenthesised argument list) means that that
#          name : f(g), h(i)
#  could be the start of a constructor or the start of an anonymous bit-field. An ambiguity is avoided by
#  parsing the ctor-initializer of a function_definition as a bit-field.
#
def p_simple_member_declaration(p):
    '''simple_member_declaration :  ';'
    |                               assignment_expression ';'
    |                               constructor_head ';'
    |                               member_init_declarations ';'
    |                               decl_specifier_prefix simple_member_declaration
    '''
    global _parse_info
    decl = flatten(get_rest(p))
    if len(decl) >= 4 and decl[-3] == "(":
        _parse_info.add_function(decl[-4])

def p_member_init_declarations(p):
    '''member_init_declarations :   assignment_expression ',' member_init_declaration
    |                               constructor_head ',' bit_field_init_declaration
    |                               member_init_declarations ',' member_init_declaration
    '''
    pass

def p_member_init_declaration(p):
    '''member_init_declaration :    assignment_expression
    |                               bit_field_init_declaration
    '''
    pass

def p_accessibility_specifier(p):
    '''accessibility_specifier :    access_specifier ':'
    '''
    pass

def p_bit_field_declaration(p):
    '''bit_field_declaration :      assignment_expression ':' bit_field_width
    |                               ':' bit_field_width
    '''
    if len(p) == 4:
        p[0]=p[1]

def p_bit_field_width(p):
    '''bit_field_width :            logical_or_expression
    |                               logical_or_expression '?' bit_field_width ':' bit_field_width
    '''
    pass

def p_bit_field_init_declaration(p):
    '''bit_field_init_declaration : bit_field_declaration
    |                               bit_field_declaration '=' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.9 Derived classes
#---------------------------------------------------------------------------------------------------
def p_base_specifier_list(p):
    '''base_specifier_list :        base_specifier
    |                               base_specifier_list ',' base_specifier
    '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]+[p[3]]

def p_base_specifier(p):
    '''base_specifier :             scoped_id
    |                               access_specifier base_specifier
    |                               VIRTUAL base_specifier
    '''
    if len(p) == 2:
        p[0] = p[1]
    else:
        p[0] = p[2]

def p_access_specifier(p):
    '''access_specifier :           PRIVATE 
    |                               PROTECTED 
    |                               PUBLIC
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.10 Special member functions
#---------------------------------------------------------------------------------------------------
def p_conversion_function_id(p):
    '''conversion_function_id :     OPERATOR conversion_type_id
    '''
    p[0] = ['operator']

def p_conversion_type_id(p):
    '''conversion_type_id :         type_specifier ptr_operator_seq_opt
    |                               type_specifier conversion_type_id
    '''
    pass

#
#  Ctor-initialisers can look like a bit field declaration, given the generalisation of names:
#      Class(Type) : m1(1), m2(2) { }
#      NonClass(bit_field) : int(2), second_variable, ...
#  The grammar below is used within a function_try_block or function_definition.
#  See simple_member_declaration for use in normal member function_definition.
#
def p_ctor_initializer_opt(p):
    '''ctor_initializer_opt :       empty
    |                               ctor_initializer
    '''
    pass

def p_ctor_initializer(p):
    '''ctor_initializer :           ':' mem_initializer_list
    '''
    pass

def p_mem_initializer_list(p):
    '''mem_initializer_list :       mem_initializer
    |                               mem_initializer_list_head mem_initializer
    '''
    pass

def p_mem_initializer_list_head(p):
    '''mem_initializer_list_head :  mem_initializer_list ','
    '''
    pass

def p_mem_initializer(p):
    '''mem_initializer :            mem_initializer_id '(' expression_list_opt ')'
    '''
    pass

def p_mem_initializer_id(p):
    '''mem_initializer_id :         scoped_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.11 Overloading
#---------------------------------------------------------------------------------------------------

def p_operator_function_id(p):
    '''operator_function_id :       OPERATOR operator
    |                               OPERATOR '(' ')'
    |                               OPERATOR LBRACKET RBRACKET
    |                               OPERATOR '<'
    |                               OPERATOR '>'
    |                               OPERATOR operator '<' nonlgt_seq_opt '>'
    '''
    p[0] = ["operator"]

#
#  It is not clear from the ANSI standard whether spaces are permitted in delete[]. If not then it can
#  be recognised and returned as DELETE_ARRAY by the lexer. Assuming spaces are permitted there is an
#  ambiguity created by the over generalised nature of expressions. operator new is a valid delarator-id
#  which we may have an undimensioned array of. Semantic rubbish, but syntactically valid. Since the
#  array form is covered by the declarator consideration we can exclude the operator here. The need
#  for a semantic rescue can be eliminated at the expense of a couple of shift-reduce conflicts by
#  removing the comments on the next four lines.
#
def p_operator(p):
    '''operator :                   NEW
    |                               DELETE
    |                               '+'
    |                               '-'
    |                               '*'
    |                               '/'
    |                               '%'
    |                               '^'
    |                               '&'
    |                               '|'
    |                               '~'
    |                               '!'
    |                               '='
    |                               ASS_ADD
    |                               ASS_SUB
    |                               ASS_MUL
    |                               ASS_DIV
    |                               ASS_MOD
    |                               ASS_XOR
    |                               ASS_AND
    |                               ASS_OR
    |                               SHL
    |                               SHR
    |                               ASS_SHR
    |                               ASS_SHL
    |                               EQ
    |                               NE
    |                               LE
    |                               GE
    |                               LOG_AND
    |                               LOG_OR
    |                               INC
    |                               DEC
    |                               ','
    |                               ARROW_STAR
    |                               ARROW
    '''
    p[0]=p[1]

#    |                               IF
#    |                               SWITCH
#    |                               WHILE
#    |                               FOR
#    |                               DO
def p_reserved(p):
    '''reserved :                   PRIVATE
    |                               CLiteral
    |                               CppLiteral
    |                               IF
    |                               SWITCH
    |                               WHILE
    |                               FOR
    |                               DO
    |                               PROTECTED
    |                               PUBLIC
    |                               BOOL
    |                               CHAR
    |                               DOUBLE
    |                               FLOAT
    |                               INT
    |                               LONG
    |                               SHORT
    |                               SIGNED
    |                               UNSIGNED
    |                               VOID
    |                               WCHAR_T
    |                               CLASS
    |                               ENUM
    |                               NAMESPACE
    |                               STRUCT
    |                               TYPENAME
    |                               UNION
    |                               CONST
    |                               VOLATILE
    |                               AUTO
    |                               EXPLICIT
    |                               EXPORT
    |                               EXTERN
    |                               FRIEND
    |                               INLINE
    |                               MUTABLE
    |                               REGISTER
    |                               STATIC
    |                               TEMPLATE
    |                               TYPEDEF
    |                               USING
    |                               VIRTUAL
    |                               ASM
    |                               BREAK
    |                               CASE
    |                               CATCH
    |                               CONST_CAST
    |                               CONTINUE
    |                               DEFAULT
    |                               DYNAMIC_CAST
    |                               ELSE
    |                               FALSE
    |                               GOTO
    |                               OPERATOR
    |                               REINTERPRET_CAST
    |                               RETURN
    |                               SIZEOF
    |                               STATIC_CAST
    |                               THIS
    |                               THROW
    |                               TRUE
    |                               TRY
    |                               TYPEID
    |                               ATTRIBUTE
    |                               CDECL
    |                               TYPEOF
    |                               uTYPEOF
    '''
    if p[1] in ('try', 'catch', 'throw'):
        global noExceptionLogic
        noExceptionLogic=False

#---------------------------------------------------------------------------------------------------
# A.12 Templates
#---------------------------------------------------------------------------------------------------
def p_template_declaration(p):
    '''template_declaration :       template_parameter_clause declaration
    |                               EXPORT template_declaration
    '''
    pass

def p_template_parameter_clause(p):
    '''template_parameter_clause :  TEMPLATE '<' nonlgt_seq_opt '>'
    '''
    pass

#
#  Generalised naming makes identifier a valid declaration, so TEMPLATE identifier is too.
#  The TEMPLATE prefix is therefore folded into all names, parenthesis_clause and decl_specifier_prefix.
#
# explicit_instantiation:           TEMPLATE declaration
#
def p_explicit_specialization(p):
    '''explicit_specialization :    TEMPLATE '<' '>' declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.13 Exception Handling
#---------------------------------------------------------------------------------------------------
def p_handler_seq(p):
    '''handler_seq :                handler
    |                               handler handler_seq
    '''
    pass

def p_handler(p):
    '''handler :                    CATCH '(' exception_declaration ')' compound_statement
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_declaration(p):
    '''exception_declaration :      parameter_declaration
    '''
    pass

def p_throw_expression(p):
    '''throw_expression :           THROW
    |                               THROW assignment_expression
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_specification(p):
    '''exception_specification :    THROW '(' ')'
    |                               THROW '(' type_id_list ')'
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_type_id_list(p):
    '''type_id_list :               type_id
    |                               type_id_list ',' type_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# Misc productions
#---------------------------------------------------------------------------------------------------
def p_nonsemicolon_seq(p):
    '''nonsemicolon_seq :           empty
    |                               nonsemicolon_seq nonsemicolon
    '''
    pass

def p_nonsemicolon(p):
    '''nonsemicolon :               misc
    |                               '('
    |                               ')'
    |                               '<'
    |                               '>'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonparen_seq_opt(p):
    '''nonparen_seq_opt :           empty
    |                               nonparen_seq_opt nonparen
    '''
    pass

def p_nonparen_seq(p):
    '''nonparen_seq :               nonparen
    |                               nonparen_seq nonparen
    '''
    pass

def p_nonparen(p):
    '''nonparen :                   misc
    |                               '<'
    |                               '>'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbracket_seq_opt(p):
    '''nonbracket_seq_opt :         empty
    |                               nonbracket_seq_opt nonbracket
    '''
    pass

def p_nonbracket_seq(p):
    '''nonbracket_seq :             nonbracket
    |                               nonbracket_seq nonbracket
    '''
    pass

def p_nonbracket(p):
    '''nonbracket :                 misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbrace_seq_opt(p):
    '''nonbrace_seq_opt :           empty
    |                               nonbrace_seq_opt nonbrace
    '''
    pass

def p_nonbrace(p):
    '''nonbrace :                   misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonlgt_seq_opt(p):
    '''nonlgt_seq_opt :             empty
    |                               nonlgt_seq_opt nonlgt
    '''
    pass

def p_nonlgt(p):
    '''nonlgt :                     misc
    |                               '('
    |                               ')'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               '<' nonlgt_seq_opt '>'
    |                               ';'
    '''
    pass

def p_misc(p):
    '''misc :                       operator
    |                               identifier
    |                               IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               reserved
    |                               '?'
    |                               ':'
    |                               '.'
    |                               SCOPE
    |                               ELLIPSIS
    |                               EXTENSION
    '''
    pass

def p_empty(p):
    '''empty : '''
    pass


#
# Compute column.
#     input is the input text string
#     token is a token instance
#
def _find_column(input,token):
    ''' TODO '''
    i = token.lexpos
    while i > 0:
        if input[i] == '\n': break
        i -= 1
    column = (token.lexpos - i)+1
    return column

def p_error(p):
    if p is None:
        tmp = "Syntax error at end of file."
    else:
        tmp = "Syntax error at token "
        if p.type is "":
            tmp = tmp + "''"
        else:
            tmp = tmp + str(p.type)
        tmp = tmp + " with value '"+str(p.value)+"'"
        tmp = tmp + " in line " + str(lexer.lineno-1)
        tmp = tmp + " at column "+str(_find_column(_parsedata,p))
    raise IOError( tmp )


#
# The function that performs the parsing
#
def parse_cpp(data=None, filename=None, debug=0, optimize=0, verbose=False, func_filter=None):
    if debug > 0:
        print "Debugging parse_cpp!"
        #
        # Always remove the parser.out file, which is generated to create debugging
        #
        if os.path.exists("parser.out"):
            os.remove("parser.out")
        #
        # Remove the parsetab.py* files.  These apparently need to be removed
        # to ensure the creation of a parser.out file.
        #
        if os.path.exists("parsetab.py"):
           os.remove("parsetab.py")
        if os.path.exists("parsetab.pyc"):
           os.remove("parsetab.pyc")
        global debugging
        debugging=True
    #
    # Build lexer
    #
    global lexer
    lexer = lex.lex()
    #
    # Initialize parse object
    #
    global _parse_info
    _parse_info = CppInfo(filter=func_filter)
    _parse_info.verbose=verbose
    #
    # Build yaccer
    #
    write_table = not os.path.exists("parsetab.py")
    yacc.yacc(debug=debug, optimize=optimize, write_tables=write_table)
    #
    # Parse the file
    #
    global _parsedata
    if not data is None:
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data,debug=debug)
    elif not filename is None:
        f = open(filename)
        data = f.read()
        f.close()
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data, debug=debug)
    else:
        return None
    #
    if not noExceptionLogic:
        _parse_info.noExceptionLogic = False
    else:
        for key in identifier_lineno:
            if 'ASSERT_THROWS' in key:
                _parse_info.noExceptionLogic = False
                break
        _parse_info.noExceptionLogic = True
    #
    return _parse_info


import sys

if __name__ == '__main__':
    #
    # This MAIN routine parses a sequence of files provided at the command
    # line.  If '-v' is included, then a verbose parsing output is 
    # generated.
    #
    for arg in sys.argv[1:]:
        if arg == "-v":
            continue
        print "Parsing file '"+arg+"'"
        if '-v' in sys.argv:
            parse_cpp(filename=arg,debug=2,verbose=2)
        else:
            parse_cpp(filename=arg,verbose=2)
        #
        # Print the _parse_info object summary for this file.
        # This illustrates how class inheritance can be used to 
        # deduce class members.
        # 
        print str(_parse_info)


================================================
FILE: cxxtest/cxxtest/cxxtest_fog.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

#
# TODO: add line number info
# TODO: add test function names
#

from __future__ import division

import sys
import re
#from os.path import abspath, dirname
#sys.path.insert(0, dirname(dirname(abspath(__file__))))
#sys.path.insert(0, dirname(dirname(abspath(__file__)))+"/cxx_parse")
from cxxtest_misc import abort
import cxx_parser
import re

def cstr( str ):
    '''Convert a string to its C representation'''
    return '"' + re.sub('\\\\', '\\\\\\\\', str ) + '"'

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    suites=[]
    for file in files:
        try:
            print "Parsing file "+file,
            sys.stdout.flush()
            parse_info = cxx_parser.parse_cpp(filename=file,optimize=1)
        except IOError, err:
            print " error."
            print str(err)
            continue
        print "done." 
        sys.stdout.flush()
        #
        # WEH: see if it really makes sense to use parse information to
        # initialize this data.  I don't think so...
        #
        _options.haveStandardLibrary=1
        if not parse_info.noExceptionLogic:
            _options.haveExceptionHandling=1
        #
        keys = list(parse_info.index.keys())
        tpat = re.compile("[Tt][Ee][Ss][Tt]")
        for key in keys:
            if parse_info.index[key].scope_t == "class" and parse_info.is_baseclass(key,"CxxTest::TestSuite"):
                name=parse_info.index[key].name
                suite = { 'name'         : name,
                        'file'         : file,
                        'cfile'        : cstr(file),
                        'line'         : str(parse_info.index[key].lineno),
                        'generated'    : 0,
                        'object'       : 'suite_%s' % name,
                        'dobject'      : 'suiteDescription_%s' % name,
                        'tlist'        : 'Tests_%s' % name,
                        'tests'        : [],
                        'lines'        : [] }
                for fn in parse_info.get_functions(key,quiet=True):
                    tname = fn[0]
                    lineno = str(fn[1])
                    if tname.startswith('createSuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['create'] = str(lineno) # (unknown line)
                    if tname.startswith('destroySuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['destroy'] = str(lineno) # (unknown line)
                    if not tpat.match(tname):
                        # Skip non-test methods
                        continue
                    test = { 'name'   : tname,
                        'suite'  : suite,
                        'class'  : 'TestDescription_suite_%s_%s' % (suite['name'], tname),
                        'object' : 'testDescription_suite_%s_%s' % (suite['name'], tname),
                        'line'   : lineno,
                        }
                    suite['tests'].append(test)
                suites.append(suite)

    if not _options.root:
        ntests = 0
        for suite in suites:
            ntests += len(suite['tests'])
        if ntests == 0:
            abort( 'No tests defined' )
    #
    return [_options, suites]


================================================
FILE: cxxtest/cxxtest/cxxtest_misc.py
================================================
#!/usr/bin/python
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

import sys

def abort( problem ):
    '''Print error message and exit'''
    sys.stderr.write( '\n' )
    sys.stderr.write( problem )
    sys.stderr.write( '\n\n' )
    sys.exit(2)


================================================
FILE: cxxtest/cxxtest/cxxtest_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

from __future__ import division

import codecs
import re
#import sys
#import getopt
#import glob
from cxxtest.cxxtest_misc import abort

# Global variables
suites = []
suite = None
inBlock = 0
options=None

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    global options
    options=_options
    for file in files:
        scanInputFile(file)
    global suites
    if len(suites) is 0 and not options.root:
        abort( 'No tests defined' )
    return [options,suites]

lineCont_re = re.compile('(.*)\\\s*$')
def scanInputFile(fileName):
    '''Scan single input file for test suites'''
    # mode 'rb' is problematic in python3 - byte arrays don't behave the same as
    # strings.
    # As far as the choice of the default encoding: utf-8 chews through
    # everything that the previous ascii codec could, plus most of new code.
    # TODO: figure out how to do this properly - like autodetect encoding from
    # file header.
    file = codecs.open(fileName, mode='r', encoding='utf-8')
    prev = ""
    lineNo = 0
    contNo = 0
    while 1:
        line = file.readline()
        if not line:
            break
        lineNo += 1

        m = lineCont_re.match(line)
        if m:
            prev += m.group(1) + " "
            contNo += 1
        else:
            scanInputLine( fileName, lineNo - contNo, prev + line )
            contNo = 0
            prev = ""
    if contNo:
        scanInputLine( fileName, lineNo - contNo, prev + line )
        
    closeSuite()
    file.close()

def scanInputLine( fileName, lineNo, line ):
    '''Scan single input line for interesting stuff'''
    scanLineForExceptionHandling( line )
    scanLineForStandardLibrary( line )

    scanLineForSuiteStart( fileName, lineNo, line )

    global suite
    if suite:
        scanLineInsideSuite( suite, lineNo, line )

def scanLineInsideSuite( suite, lineNo, line ):
    '''Analyze line which is part of a suite'''
    global inBlock
    if lineBelongsToSuite( suite, lineNo, line ):
        scanLineForTest( suite, lineNo, line )
        scanLineForCreate( suite, lineNo, line )
        scanLineForDestroy( suite, lineNo, line )

def lineBelongsToSuite( suite, lineNo, line ):
    '''Returns whether current line is part of the current suite.
    This can be false when we are in a generated suite outside of CXXTEST_CODE() blocks
    If the suite is generated, adds the line to the list of lines'''
    if not suite['generated']:
        return 1

    global inBlock
    if not inBlock:
        inBlock = lineStartsBlock( line )
    if inBlock:
        inBlock = addLineToBlock( suite, lineNo, line )
    return inBlock


std_re = re.compile( r"\b(std\s*::|CXXTEST_STD|using\s+namespace\s+std\b|^\s*\#\s*include\s+<[a-z0-9]+>)" )
def scanLineForStandardLibrary( line ):
    '''Check if current line uses standard library'''
    global options
    if not options.haveStandardLibrary and std_re.search(line):
        if not options.noStandardLibrary:
            options.haveStandardLibrary = 1

exception_re = re.compile( r"\b(throw|try|catch|TSM?_ASSERT_THROWS[A-Z_]*)\b" )
def scanLineForExceptionHandling( line ):
    '''Check if current line uses exception handling'''
    global options
    if not options.haveExceptionHandling and exception_re.search(line):
        if not options.noExceptionHandling:
            options.haveExceptionHandling = 1

classdef = '(?:::\s*)?(?:\w+\s*::\s*)*\w+'
baseclassdef = '(?:public|private|protected)\s+%s' % (classdef,)
general_suite = r"\bclass\s+(%s)\s*:(?:\s*%s\s*,)*\s*public\s+" \
                % (classdef, baseclassdef,)
testsuite = '(?:(?:::)?\s*CxxTest\s*::\s*)?TestSuite'
suites_re = { re.compile( general_suite + testsuite ) : None }
generatedSuite_re = re.compile( r'\bCXXTEST_SUITE\s*\(\s*(\w*)\s*\)' )
def scanLineForSuiteStart( fileName, lineNo, line ):
    '''Check if current line starts a new test suite'''
    for i in list(suites_re.items()):
        m = i[0].search( line )
        if m:
            suite = startSuite( m.group(1), fileName, lineNo, 0 )
            if i[1] is not None:
                for test in i[1]['tests']:
                    addTest(suite, test['name'], test['line'])
            break
    m = generatedSuite_re.search( line )
    if m:
        sys.stdout.write( "%s:%s: Warning: Inline test suites are deprecated.\n" % (fileName, lineNo) )
        startSuite( m.group(1), fileName, lineNo, 1 )

def startSuite( name, file, line, generated ):
    '''Start scanning a new suite'''
    global suite
    closeSuite()
    object_name = name.replace(':',"_")
    suite = { 'name'         : name,
              'file'         : file,
              'cfile'        : cstr(file),
              'line'         : line,
              'generated'    : generated,
              'object'       : 'suite_%s' % object_name,
              'dobject'      : 'suiteDescription_%s' % object_name,
              'tlist'        : 'Tests_%s' % object_name,
              'tests'        : [],
              'lines'        : [] }
    suites_re[re.compile( general_suite + name )] = suite
    return suite

def lineStartsBlock( line ):
    '''Check if current line starts a new CXXTEST_CODE() block'''
    return re.search( r'\bCXXTEST_CODE\s*\(', line ) is not None

test_re = re.compile( r'^([^/]|/[^/])*\bvoid\s+([Tt]est\w+)\s*\(\s*(void)?\s*\)' )
def scanLineForTest( suite, lineNo, line ):
    '''Check if current line starts a test'''
    m = test_re.search( line )
    if m:
        addTest( suite, m.group(2), lineNo )

def addTest( suite, name, line ):
    '''Add a test function to the current suite'''
    test = { 'name'   : name,
             'suite'  : suite,
             'class'  : 'TestDescription_%s_%s' % (suite['object'], name),
             'object' : 'testDescription_%s_%s' % (suite['object'], name),
             'line'   : line,
             }
    suite['tests'].append( test )

def addLineToBlock( suite, lineNo, line ):
    '''Append the line to the current CXXTEST_CODE() block'''
    line = fixBlockLine( suite, lineNo, line )
    line = re.sub( r'^.*\{\{', '', line )
    
    e = re.search( r'\}\}', line )
    if e:
        line = line[:e.start()]
    suite['lines'].append( line )
    return e is None

def fixBlockLine( suite, lineNo, line):
    '''Change all [E]TS_ macros used in a line to _[E]TS_ macros with the correct file/line'''
    return re.sub( r'\b(E?TSM?_(ASSERT[A-Z_]*|FAIL))\s*\(',
                   r'_\1(%s,%s,' % (suite['cfile'], lineNo),
                   line, 0 )

create_re = re.compile( r'\bstatic\s+\w+\s*\*\s*createSuite\s*\(\s*(void)?\s*\)' )
def scanLineForCreate( suite, lineNo, line ):
    '''Check if current line defines a createSuite() function'''
    if create_re.search( line ):
        addSuiteCreateDestroy( suite, 'create', lineNo )

destroy_re = re.compile( r'\bstatic\s+void\s+destroySuite\s*\(\s*\w+\s*\*\s*\w*\s*\)' )
def scanLineForDestroy( suite, lineNo, line ):
    '''Check if current line defines a destroySuite() function'''
    if destroy_re.search( line ):
        addSuiteCreateDestroy( suite, 'destroy', lineNo )

def cstr( s ):
    '''Convert a string to its C representation'''
    return '"' + s.replace( '\\', '\\\\' ) + '"'


def addSuiteCreateDestroy( suite, which, line ):
    '''Add createSuite()/destroySuite() to current suite'''
    if which in suite:
        abort( '%s:%s: %sSuite() already declared' % ( suite['file'], str(line), which ) )
    suite[which] = line

def closeSuite():
    '''Close current suite and add it to the list if valid'''
    global suite
    if suite is not None:
        if len(suite['tests']) is not 0:
            verifySuite(suite)
            rememberSuite(suite)
        suite = None

def verifySuite(suite):
    '''Verify current suite is legal'''
    if 'create' in suite and 'destroy' not in suite:
        abort( '%s:%s: Suite %s has createSuite() but no destroySuite()' %
               (suite['file'], suite['create'], suite['name']) )
    elif 'destroy' in suite and 'create' not in suite:
        abort( '%s:%s: Suite %s has destroySuite() but no createSuite()' %
               (suite['file'], suite['destroy'], suite['name']) )

def rememberSuite(suite):
    '''Add current suite to list'''
    global suites
    suites.append( suite )


================================================
FILE: cxxtest/cxxtest/cxxtestgen.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8

from __future__ import division
# the above import important for forward-compatibility with python3,
# which is already the default in archlinux!

__all__ = ['main']

import __release__
import os
import sys
import re
import glob
from optparse import OptionParser
import cxxtest_parser

try:
    import cxxtest_fog
    imported_fog=True
except ImportError:
    imported_fog=False

from cxxtest_misc import abort

options = []
suites = []

wrotePreamble = 0
wroteWorld = 0
lastIncluded = ''

def main(args=sys.argv):
    '''The main program'''
    #
    # Reset global state
    #
    global wrotePreamble
    wrotePreamble=0
    global wroteWorld
    wroteWorld=0
    global lastIncluded
    lastIncluded = ''

    global suites
    global options
    files = parseCommandline(args)
    if imported_fog and options.fog:
        [options,suites] = cxxtest_fog.scanInputFiles( files, options )
    else:
        [options,suites] = cxxtest_parser.scanInputFiles( files, options )
    writeOutput()

def parseCommandline(args):
    '''Analyze command line arguments'''
    global imported_fog
    global options
    parser = OptionParser("%prog [options] [<filename> ...]")
    parser.add_option("--version",
                      action="store_true", dest="version", default=False,
                      help="Write the CxxTest version.")
    parser.add_option("-o", "--output",
                      dest="outputFileName", default=None, metavar="NAME",
                      help="Write output to file NAME.")
    parser.add_option("-w","--world", dest="world", default="cxxtest",
                      help="The label of the tests, used to name the XML results.")
    parser.add_option("", "--include", action="append",
                      dest="headers", default=[], metavar="HEADER",
                      help="Include file HEADER in the test runner before other headers.")
    parser.add_option("", "--abort-on-fail",
                      action="store_true", dest="abortOnFail", default=False,
                      help="Abort tests on failed asserts (like xUnit).")
    parser.add_option("", "--main",
                      action="store", dest="main", default="main",
                      help="Specify an alternative name for the main() function.")
    parser.add_option("", "--headers",
                      action="store", dest="header_filename", default=None,
                      help="Specify a filename that contains a list of header files that are processed to generate a test runner.")
    parser.add_option("", "--runner",
                      dest="runner", default="", metavar="CLASS",
                      help="Create a test runner that processes test events using the class CxxTest::CLASS.")
    parser.add_option("", "--gui",
                      dest="gui", metavar="CLASS",
                      help="Create a GUI test runner that processes test events using the class CxxTest::CLASS. (deprecated)")
    parser.add_option("", "--error-printer",
                      action="store_true", dest="error_printer", default=False,
                      help="Create a test runner using the ErrorPrinter class, and allow the use of the standard library.")
    parser.add_option("", "--xunit-printer",
                      action="store_true", dest="xunit_printer", default=False,
                      help="Create a test runner using the XUnitPrinter class.")
    parser.add_option("", "--xunit-file",  dest="xunit_file", default="",
                      help="The file to which the XML summary is written for test runners using the XUnitPrinter class.  The default XML filename is TEST-<world>.xml, where <world> is the value of the --world option.  (default: cxxtest)")
    parser.add_option("", "--have-std",
                      action="store_true", dest="haveStandardLibrary", default=False,
                      help="Use the standard library (even if not found in tests).")
    parser.add_option("", "--no-std",
                      action="store_true", dest="noStandardLibrary", default=False,
                      help="Do not use standard library (even if found in tests).")
    parser.add_option("", "--have-eh",
                      action="store_true", dest="haveExceptionHandling", default=False,
                      help="Use exception handling (even if not found in tests).")
    parser.add_option("", "--no-eh",
                      action="store_true", dest="noExceptionHandling", default=False,
                      help="Do not use exception handling (even if found in tests).")
    parser.add_option("", "--longlong",
                      dest="longlong", default=None, metavar="TYPE",
                      help="Use TYPE as for long long integers.  (default: not supported)")
    parser.add_option("", "--no-static-init",
                      action="store_true", dest="noStaticInit", default=False,
                      help="Do not rely on static initialization in the test runner.")
    parser.add_option("", "--template",
                      dest="templateFileName", default=None, metavar="TEMPLATE",
                      help="Generate the test runner using file TEMPLATE to define a template.")
    parser.add_option("", "--root",
                      action="store_true", dest="root", default=False,
                      help="Write the main() function and global data for a test runner.")
    parser.add_option("", "--part",
                      action="store_true", dest="part", default=False,
                      help="Write the tester classes for a test runner.")
    #parser.add_option("", "--factor",
                      #action="store_true", dest="factor", default=False,
                      #help="Declare the _CXXTEST_FACTOR macro.  (deprecated)")
    if imported_fog:
        fog_help = "Use new FOG C++ parser"
    else:
        fog_help = "Use new FOG C++ parser (disabled)"
    parser.add_option("-f", "--fog-parser",
                        action="store_true",
                        dest="fog",
                        default=False,
                        help=fog_help
                        )

    (options, args) = parser.parse_args(args=args)
    if not options.header_filename is None:
        if not os.path.exists(options.header_filename):
            abort( "ERROR: the file '%s' does not exist!" % options.header_filename )
        INPUT = open(options.header_filename)
        headers = [line.strip() for line in INPUT]
        args.extend( headers )
        INPUT.close()

    if options.fog and not imported_fog:
        abort( "Cannot use the FOG parser.  Check that the 'ply' package is installed.  The 'ordereddict' package is also required if running Python 2.6")

    if options.version:
      printVersion()

    # the cxxtest builder relies on this behaviour! don't remove
    if options.runner == 'none':
        options.runner = None

    if options.xunit_printer or options.runner == "XUnitPrinter":
        options.xunit_printer=True
        options.runner="XUnitPrinter"
        if len(args) > 1:
            if options.xunit_file == "":
                if options.world == "":
                    options.world = "cxxtest"
                options.xunit_file="TEST-"+options.world+".xml"
        elif options.xunit_file == "":
            if options.world == "":
                options.world = "cxxtest"
            options.xunit_file="TEST-"+options.world+".xml"

    if options.error_printer:
      options.runner= "ErrorPrinter"
      options.haveStandardLibrary = True
    
    if options.noStaticInit and (options.root or options.part):
        abort( '--no-static-init cannot be used with --root/--part' )

    if options.gui and not options.runner:
        options.runner = 'StdioPrinter'

    files = setFiles(args[1:])
    if len(files) == 0 and not options.root:
        sys.stderr.write(parser.error("No input files found"))

    return files


def printVersion():
    '''Print CxxTest version and exit'''
    sys.stdout.write( "This is CxxTest version %s.\n" % __release__.__version__ )
    sys.exit(0)

def setFiles(patterns ):
    '''Set input files specified on command line'''
    files = expandWildcards( patterns )
    return files

def expandWildcards( patterns ):
    '''Expand all wildcards in an array (glob)'''
    fileNames = []
    for pathName in patterns:
        patternFiles = glob.glob( pathName )
        for fileName in patternFiles:
            fileNames.append( fixBackslashes( fileName ) )
    return fileNames

def fixBackslashes( fileName ):
    '''Convert backslashes to slashes in file name'''
    return re.sub( r'\\', '/', fileName, 0 )


def writeOutput():
    '''Create output file'''
    if options.templateFileName:
        writeTemplateOutput()
    else:
        writeSimpleOutput()

def writeSimpleOutput():
    '''Create output not based on template'''
    output = startOutputFile()
    writePreamble( output )
    if options.root or not options.part:
        writeMain( output )

    if len(suites) > 0:
        output.write("bool "+suites[0]['object']+"_init = false;\n")

    writeWorld( output )
    output.close()

include_re = re.compile( r"\s*\#\s*include\s+<cxxtest/" )
preamble_re = re.compile( r"^\s*<CxxTest\s+preamble>\s*$" )
world_re = re.compile( r"^\s*<CxxTest\s+world>\s*$" )
def writeTemplateOutput():
    '''Create output based on template file'''
    template = open(options.templateFileName)
    output = startOutputFile()
    while 1:
        line = template.readline()
        if not line:
            break;
        if include_re.search( line ):
            writePreamble( output )
            output.write( line )
        elif preamble_re.search( line ):
            writePreamble( output )
        elif world_re.search( line ):
            if len(suites) > 0:
                output.write("bool "+suites[0]['object']+"_init = false;\n")
            writeWorld( output )
        else:
            output.write( line )
    template.close()
    output.close()

def startOutputFile():
    '''Create output file and write header'''
    if options.outputFileName is not None:
        output = open( options.outputFileName, 'w' )
    else:
        output = sys.stdout
    output.write( "/* Generated file, do not edit */\n\n" )
    return output

def writePreamble( output ):
    '''Write the CxxTest header (#includes and #defines)'''
    global wrotePreamble
    if wrotePreamble: return
    output.write( "#ifndef CXXTEST_RUNNING\n" )
    output.write( "#define CXXTEST_RUNNING\n" )
    output.write( "#endif\n" )
    output.write( "\n" )
    if options.xunit_printer:
        output.write( "#include <fstream>\n" )
    if options.haveStandardLibrary:
        output.write( "#define _CXXTEST_HAVE_STD\n" )
    if options.haveExceptionHandling:
        output.write( "#define _CXXTEST_HAVE_EH\n" )
    if options.abortOnFail:
        output.write( "#define _CXXTEST_ABORT_TEST_ON_FAIL\n" )
    if options.longlong:
        output.write( "#define _CXXTEST_LONGLONG %s\n" % options.longlong )
    #if options.factor:
        #output.write( "#define _CXXTEST_FACTOR\n" )
    for header in options.headers:
        output.write( "#include \"%s\"\n" % header )
    output.write( "#include <cxxtest/TestListener.h>\n" )
    output.write( "#include <cxxtest/TestTracker.h>\n" )
    output.write( "#include <cxxtest/TestRunner.h>\n" )
    output.write( "#include <cxxtest/RealDescriptions.h>\n" )
    output.write( "#include <cxxtest/TestMain.h>\n" )
    if options.runner:
        output.write( "#include <cxxtest/%s.h>\n" % options.runner )
    if options.gui:
        output.write( "#include <cxxtest/%s.h>\n" % options.gui )
    output.write( "\n" )
    wrotePreamble = 1

def writeMain( output ):
    '''Write the main() function for the test runner'''
    if not (options.gui or options.runner):
       return
    output.write( 'int %s( int argc, char *argv[] ) {\n' % options.main )
    output.write( ' int status;\n' )
    if options.noStaticInit:
        output.write( ' CxxTest::initialize();\n' )
    if options.gui:
        tester_t = "CxxTest::GuiTuiRunner<CxxTest::%s, CxxTest::%s> " % (options.gui, options.runner)
    else:
        tester_t = "CxxTest::%s" % (options.runner)
    if options.xunit_printer:
       output.write( '    std::ofstream ofstr("%s");\n' % options.xunit_file )
       output.write( '    %s tmp(ofstr);\n' % tester_t )
       output.write( '    CxxTest::RealWorldDescription::_worldName = "%s";\n' % options.world )
    else:
       output.write( '    %s tmp;\n' % tester_t )
    output.write( '    status = CxxTest::Main<%s>( tmp, argc, argv );\n' % tester_t )
    output.write( '    return status;\n')
    output.write( '}\n' )


def writeWorld( output ):
    '''Write the world definitions'''
    global wroteWorld
    if wroteWorld: return
    writePreamble( output )
    writeSuites( output )
    if options.root or not options.part:
        writeRoot( output )
        writeWorldDescr( output )
    if options.noStaticInit:
        writeInitialize( output )
    wroteWorld = 1

def writeSuites(output):
    '''Write all TestDescriptions and SuiteDescriptions'''
    for suite in suites:
        writeInclude( output, suite['file'] )
        if isGenerated(suite):
            generateSuite( output, suite )
        if isDynamic(suite):
            writeSuitePointer( output, suite )
        else:
            writeSuiteObject( output, suite )
        writeTestList( output, suite )
        writeSuiteDescription( output, suite )
        writeTestDescriptions( output, suite )

def isGenerated(suite):
    '''Checks whether a suite class should be created'''
    return suite['generated']

def isDynamic(suite):
    '''Checks whether a suite is dynamic'''
    return 'create' in suite

def writeInclude(output, file):
    '''Add #include "file" statement'''
    global lastIncluded
    if file == lastIncluded: return
    output.writelines( [ '#include "', file, '"\n\n' ] )
    lastIncluded = file

def generateSuite( output, suite ):
    '''Write a suite declared with CXXTEST_SUITE()'''
    output.write( 'class %s : public CxxTest::TestSuite {\n' % suite['name'] )
    output.write( 'public:\n' )
    for line in suite['lines']:
        output.write(line)
    output.write( '};\n\n' )

def writeSuitePointer( output, suite ):
    '''Create static suite pointer object for dynamic suites'''
    if options.noStaticInit:
        output.write( 'static %s *%s;\n\n' % (suite['name'], suite['object']) )
    else:
        output.write( 'static %s *%s = 0;\n\n' % (suite['name'], suite['object']) )

def writeSuiteObject( output, suite ):
    '''Create static suite object for non-dynamic suites'''
    output.writelines( [ "static ", suite['name'], " ", suite['object'], ";\n\n" ] )

def writeTestList( output, suite ):
    '''Write the head of the test linked list for a suite'''
    if options.noStaticInit:
        output.write( 'static CxxTest::List %s;\n' % suite['tlist'] )
    else:
        output.write( 'static CxxTest::List %s = { 0, 0 };\n' % suite['tlist'] )

def writeWorldDescr( output ):
    '''Write the static name of the world name'''
    if options.noStaticInit:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName;\n' )
    else:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName = "cxxtest";\n' )

def writeTestDescriptions( output, suite ):
    '''Write all test descriptions for a suite'''
    for test in suite['tests']:
        writeTestDescription( output, suite, test )

def writeTestDescription( output, suite, test ):
    '''Write test description object'''
    output.write( 'static class %s : public CxxTest::RealTestDescription {\n' % test['class'] )
    output.write( 'public:\n' )
    if not options.noStaticInit:
        output.write( ' %s() : CxxTest::RealTestDescription( %s, %s, %s, "%s" ) {}\n' %
                      (test['class'], suite['tlist'], suite['dobject'], test['line'], test['name']) )
    output.write( ' void runTest() { %s }\n' % runBody( suite, test ) )
    output.write( '} %s;\n\n' % test['object'] )

def runBody( suite, test ):
    '''Body of TestDescription::run()'''
    if isDynamic(suite): return dynamicRun( suite, test )
    else: return staticRun( suite, test )

def dynamicRun( suite, test ):
    '''Body of TestDescription::run() for test in a dynamic suite'''
    return 'if ( ' + suite['object'] + ' ) ' + suite['object'] + '->' + test['name'] + '();'
    
def staticRun( suite, test ):
    '''Body of TestDescription::run() for test in a non-dynamic suite'''
    return suite['object'] + '.' + test['name'] + '();'
    
def writeSuiteDescription( output, suite ):
    '''Write SuiteDescription object'''
    if isDynamic( suite ):
        writeDynamicDescription( output, suite )
    else:
        writeStaticDescription( output, suite )

def writeDynamicDescription( output, suite ):
    '''Write SuiteDescription for a dynamic suite'''
    output.write( 'CxxTest::DynamicSuiteDescription<%s> %s' % (suite['name'], suite['dobject']) )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s, %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['tlist'],
                       suite['object'], suite['create'], suite['destroy']) )
    output.write( ';\n\n' )

def writeStaticDescription( output, suite ):
    '''Write SuiteDescription for a static suite'''
    output.write( 'CxxTest::StaticSuiteDescription %s' % suite['dobject'] )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['object'], suite['tlist']) )
    output.write( ';\n\n' )

def writeRoot(output):
    '''Write static members of CxxTest classes'''
    output.write( '#include <cxxtest/Root.cpp>\n' )

def writeInitialize(output):
    '''Write CxxTest::initialize(), which replaces static initialization'''
    output.write( 'namespace CxxTest {\n' )
    output.write( ' void initialize()\n' )
    output.write( ' {\n' )
    for suite in suites:
        output.write( '  %s.initialize();\n' % suite['tlist'] )
        if isDynamic(suite):
            output.write( '  %s = 0;\n' % suite['object'] )
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s, %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['tlist'], suite['object'], suite['create'], suite['destroy']) )
        else:
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['object'], suite['tlist']) )

        for test in suite['tests']:
            output.write( '  %s.initialize( %s, %s, %s, "%s" );\n' %
                          (test['object'], suite['tlist'], suite['dobject'], test['line'], test['name']) )

    output.write( ' }\n' )
    output.write( '}\n' )


================================================
FILE: cxxtest/cxxtestgen
================================================
#! /usr/bin/env python
#
# The CxxTest driver script, which uses the cxxtest Python package.
#

import sys
import os
from os.path import realpath, dirname
if sys.version_info < (3,0):
    sys.path.insert(0, dirname(dirname(realpath(__file__)))+os.sep+'python')
else:
    sys.path.insert(0, dirname(dirname(realpath(__file__)))+os.sep+'python'+os.sep+'python3')
sys.path.append(".")

import cxxtest

cxxtest.main(sys.argv)


================================================
FILE: cxxtest/python/README.txt
================================================
CxxTest Python Package
======================

The CxxTest Python package includes utilities that are used by the
CxxTest unit testing framework.  Specifically, this Python package
supports C++ parsing and code generation done in the cxxtestgen 
script.


================================================
FILE: cxxtest/python/convert.py
================================================
#
# Execute this script to copy the cxxtest/*.py files
# and run 2to3 to convert them to Python 3.
#

import glob
import subprocess
import os
import shutil

os.chdir('cxxtest')
for file in glob.glob('*.py'):
    shutil.copyfile(file, '../python3/cxxtest/'+file)
#
os.chdir('../python3/cxxtest')
#
for file in glob.glob('*.py'):
    subprocess.call('2to3 -w '+file, shell=True)


================================================
FILE: cxxtest/python/cxxtest/__init__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

"""cxxtest: A Python package that supports the CxxTest test framework for C/C++.

.. _CxxTest: http://cxxtest.tigris.org/

CxxTest is a unit testing framework for C++ that is similar in
spirit to JUnit, CppUnit, and xUnit. CxxTest is easy to use because
it does not require precompiling a CxxTest testing library, it
employs no advanced features of C++ (e.g. RTTI) and it supports a
very flexible form of test discovery.

The cxxtest Python package includes capabilities for parsing C/C++ source files and generating
CxxTest drivers.
"""

from cxxtest.__release__ import __version__, __date__
__date__
__version__

__maintainer__ = "William E. Hart"
__maintainer_email__ = "whart222@gmail.com"
__license__ = "LGPL"
__url__ = "http://cxxtest.tigris.org/"

from cxxtest.cxxtestgen import *


================================================
FILE: cxxtest/python/cxxtest/__release__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

""" Release Information for cxxtest """

__version__ = '4.0.2'
__date__ = "2012-01-02"


================================================
FILE: cxxtest/python/cxxtest/cxx_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8

#
# This is a PLY parser for the entire ANSI C++ grammar.  This grammar was 
# adapted from the FOG grammar developed by E. D. Willink.  See
#
#    http://www.computing.surrey.ac.uk/research/dsrg/fog/
#
# for further details.
#
# The goal of this grammar is to extract information about class, function and
# class method declarations, along with their associated scope.  Thus, this 
# grammar can be used to analyze classes in an inheritance heirarchy, and then
# enumerate the methods in a derived class.
#
# This grammar parses blocks of <>, (), [] and {} in a generic manner.  Thus,
# There are several capabilities that this grammar does not support:
#
# 1. Ambiguous template specification.  This grammar cannot parse template
#       specifications that do not have paired <>'s in their declaration.  In
#       particular, ambiguous declarations like
#
#           foo<A, c<3 >();
#
#       cannot be correctly parsed.
#
# 2. Template class specialization.  Although the goal of this grammar is to
#       extract class information, specialization of templated classes is
#       not supported.  When a template class definition is parsed, it's 
#       declaration is archived without information about the template
#       parameters.  Class specializations will be stored separately, and 
#       thus they can be processed after the fact.  However, this grammar
#       does not attempt to correctly process properties of class inheritence
#       when template class specialization is employed.
#

#
# TODO: document usage of this file
#

from __future__ import division

import os
import ply.lex as lex
import ply.yacc as yacc
import re
try:
    from collections import OrderedDict
except ImportError:
    from ordereddict import OrderedDict

lexer = None
scope_lineno = 0
identifier_lineno = {}
_parse_info=None
_parsedata=None
noExceptionLogic = True

def ply_init(data):
    global _parsedata
    _parsedata=data


class Scope(object):

    def __init__(self,name,abs_name,scope_t,base_classes,lineno):
        self.function=[]
        self.name=name
        self.scope_t=scope_t
        self.sub_scopes=[]
        self.base_classes=base_classes
        self.abs_name=abs_name
        self.lineno=lineno
   
    def insert(self,scope):
        self.sub_scopes.append(scope)


class CppInfo(object):

    def __init__(self, filter=None):
        self.verbose=0
        if filter is None:
            self.filter=re.compile("[Tt][Ee][Ss][Tt]|createSuite|destroySuite")
        else:
            self.filter=filter
        self.scopes=[""]
        self.index=OrderedDict()
        self.index[""]=Scope("","::","namespace",[],1)
        self.function=[]

    def push_scope(self,ns,scope_t,base_classes=[]):
        name = self.scopes[-1]+"::"+ns
        if self.verbose>=2:
            print "-- Starting "+scope_t+" "+name
        self.scopes.append(name)
        self.index[name] = Scope(ns,name,scope_t,base_classes,scope_lineno-1)

    def pop_scope(self):
        scope = self.scopes.pop()
        if self.verbose>=2:
            print "-- Stopping "+scope
        return scope

    def add_function(self, fn):
        fn = str(fn)
        if self.filter.search(fn):
            self.index[self.scopes[-1]].function.append((fn, identifier_lineno.get(fn,lexer.lineno-1)))
            tmp = self.scopes[-1]+"::"+fn
            if self.verbose==2:
                print "-- Function declaration "+fn+"  "+tmp
            elif self.verbose==1:
                print "-- Function declaration "+tmp

    def get_functions(self,name,quiet=False):
        if name == "::":
            name = ""
        scope = self.index[name]
        fns=scope.function
        for key in scope.base_classes:
            cname = self.find_class(key,scope)
            if cname is None:
                if not quiet:
                    print "Defined classes: ",list(self.index.keys())
                    print "WARNING: Unknown class "+key
            else:
                fns += self.get_functions(cname,quiet)
        return fns
        
    def find_class(self,name,scope):
        if ':' in name:
            if name in self.index:
                return name
            else:
                return None           
        tmp = scope.abs_name.split(':')
        name1 = ":".join(tmp[:-1] + [name])
        if name1 in self.index:
            return name1
        name2 = "::"+name
        if name2 in self.index:
            return name2
        return None

    def __repr__(self):
        return str(self)

    def is_baseclass(self,cls,base):
        '''Returns true if base is a base-class of cls'''
        if cls in self.index:
            bases = self.index[cls]
        elif "::"+cls in self.index:
            bases = self.index["::"+cls]
        else:
            return False
            #raise IOError, "Unknown class "+cls
        if base in bases.base_classes:
            return True
        for name in bases.base_classes:
            if self.is_baseclass(name,base):
                return True
        return False

    def __str__(self):
        ans=""
        keys = list(self.index.keys())
        keys.sort()
        for key in keys:
            scope = self.index[key]
            ans += scope.scope_t+" "+scope.abs_name+"\n"
            if scope.scope_t == "class":
                ans += "  Base Classes: "+str(scope.base_classes)+"\n"
                for fn in self.get_functions(scope.abs_name):
                    ans += "  "+fn+"\n"
            else:
                for fn in scope.function:
                    ans += "  "+fn+"\n"
        return ans


def flatten(x):
    """Flatten nested list"""
    try:
        strtypes = basestring
    except: # for python3 etc
        strtypes = (str, bytes)

    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, strtypes):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result

#
# The lexer (and/or a preprocessor) is expected to identify the following
#
#  Punctuation:
#
#
literals = "+-*/%^&|~!<>=:()?.\'\"\\@$;,"

#
reserved = {
    'private' : 'PRIVATE',
    'protected' : 'PROTECTED',
    'public' : 'PUBLIC',

    'bool' : 'BOOL',
    'char' : 'CHAR',
    'double' : 'DOUBLE',
    'float' : 'FLOAT',
    'int' : 'INT',
    'long' : 'LONG',
    'short' : 'SHORT',
    'signed' : 'SIGNED',
    'unsigned' : 'UNSIGNED',
    'void' : 'VOID',
    'wchar_t' : 'WCHAR_T',

    'class' : 'CLASS',
    'enum' : 'ENUM',
    'namespace' : 'NAMESPACE',
    'struct' : 'STRUCT',
    'typename' : 'TYPENAME',
    'union' : 'UNION',

    'const' : 'CONST',
    'volatile' : 'VOLATILE',

    'auto' : 'AUTO',
    'explicit' : 'EXPLICIT',
    'export' : 'EXPORT',
    'extern' : 'EXTERN',
    '__extension__' : 'EXTENSION',
    'friend' : 'FRIEND',
    'inline' : 'INLINE',
    'mutable' : 'MUTABLE',
    'register' : 'REGISTER',
    'static' : 'STATIC',
    'template' : 'TEMPLATE',
    'typedef' : 'TYPEDEF',
    'using' : 'USING',
    'virtual' : 'VIRTUAL',

    'asm' : 'ASM',
    'break' : 'BREAK',
    'case' : 'CASE',
    'catch' : 'CATCH',
    'const_cast' : 'CONST_CAST',
    'continue' : 'CONTINUE',
    'default' : 'DEFAULT',
    'delete' : 'DELETE',
    'do' : 'DO',
    'dynamic_cast' : 'DYNAMIC_CAST',
    'else' : 'ELSE',
    'false' : 'FALSE',
    'for' : 'FOR',
    'goto' : 'GOTO',
    'if' : 'IF',
    'new' : 'NEW',
    'operator' : 'OPERATOR',
    'reinterpret_cast' : 'REINTERPRET_CAST',
    'return' : 'RETURN',
    'sizeof' : 'SIZEOF',
    'static_cast' : 'STATIC_CAST',
    'switch' : 'SWITCH',
    'this' : 'THIS',
    'throw' : 'THROW',
    'true' : 'TRUE',
    'try' : 'TRY',
    'typeid' : 'TYPEID',
    'while' : 'WHILE',
    '"C"' : 'CLiteral',
    '"C++"' : 'CppLiteral',

    '__attribute__' : 'ATTRIBUTE',
    '__cdecl__' : 'CDECL',
    '__typeof' : 'uTYPEOF',
    'typeof' : 'TYPEOF', 

    'CXXTEST_STD' : 'CXXTEST_STD'
}
   
tokens = [
    "CharacterLiteral",
    "FloatingLiteral",
    "Identifier",
    "IntegerLiteral",
    "StringLiteral",
 "RBRACE",
 "LBRACE",
 "RBRACKET",
 "LBRACKET",
 "ARROW",
 "ARROW_STAR",
 "DEC",
 "EQ",
 "GE",
 "INC",
 "LE",
 "LOG_AND",
 "LOG_OR",
 "NE",
 "SHL",
 "SHR",
 "ASS_ADD",
 "ASS_AND",
 "ASS_DIV",
 "ASS_MOD",
 "ASS_MUL",
 "ASS_OR",
 "ASS_SHL",
 "ASS_SHR",
 "ASS_SUB",
 "ASS_XOR",
 "DOT_STAR",
 "ELLIPSIS",
 "SCOPE",
] + list(reserved.values())

t_ignore = " \t\r"

t_LBRACE = r"(\{)|(<%)"
t_RBRACE = r"(\})|(%>)"
t_LBRACKET = r"(\[)|(<:)"
t_RBRACKET = r"(\])|(:>)"
t_ARROW = r"->"
t_ARROW_STAR = r"->\*"
t_DEC = r"--"
t_EQ = r"=="
t_GE = r">="
t_INC = r"\+\+"
t_LE = r"<="
t_LOG_AND = r"&&"
t_LOG_OR = r"\|\|"
t_NE = r"!="
t_SHL = r"<<"
t_SHR = r">>"
t_ASS_ADD = r"\+="
t_ASS_AND = r"&="
t_ASS_DIV = r"/="
t_ASS_MOD = r"%="
t_ASS_MUL = r"\*="
t_ASS_OR  = r"\|="
t_ASS_SHL = r"<<="
t_ASS_SHR = r">>="
t_ASS_SUB = r"-="
t_ASS_XOR = r"^="
t_DOT_STAR = r"\.\*"
t_ELLIPSIS = r"\.\.\."
t_SCOPE = r"::"

# Discard comments
def t_COMMENT(t):
    r'(/\*(.|\n)*?\*/)|(//.*?\n)|(\#.*?\n)'
    t.lexer.lineno += t.value.count("\n")

t_IntegerLiteral = r'(0x[0-9A-F]+)|([0-9]+(L){0,1})'
t_FloatingLiteral = r"[0-9]+[eE\.\+-]+[eE\.\+\-0-9]+"
t_CharacterLiteral = r'\'([^\'\\]|\\.)*\''
#t_StringLiteral = r'"([^"\\]|\\.)*"'
def t_StringLiteral(t):
    r'"([^"\\]|\\.)*"'
    t.type = reserved.get(t.value,'StringLiteral')
    return t

def t_Identifier(t):
    r"[a-zA-Z_][a-zA-Z_0-9\.]*"
    t.type = reserved.get(t.value,'Identifier')
    return t


def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    #raise IOError, "Parse error"
    #t.lexer.skip()

def t_newline(t):
    r'[\n]+'
    t.lexer.lineno += len(t.value)

precedence = (
    ( 'right', 'SHIFT_THERE', 'REDUCE_HERE_MOSTLY', 'SCOPE'),
    ( 'nonassoc', 'ELSE', 'INC', 'DEC', '+', '-', '*', '&', 'LBRACKET', 'LBRACE', '<', ':', ')')
    )

start = 'translation_unit'

#
#  The %prec resolves the 14.2-3 ambiguity:
#  Identifier '<' is forced to go through the is-it-a-template-name test
#  All names absorb TEMPLATE with the name, so that no template_test is 
#  performed for them.  This requires all potential declarations within an 
#  expression to perpetuate this policy and thereby guarantee the ultimate 
#  coverage of explicit_instantiation.
#
#  The %prec also resolves a conflict in identifier : which is forced to be a 
#  shift of a label for a labeled-statement rather than a reduction for the 
#  name of a bit-field or generalised constructor.  This is pretty dubious 
#  syntactically but correct for all semantic possibilities.  The shift is 
#  only activated when the ambiguity exists at the start of a statement. 
#  In this context a bit-field declaration or constructor definition are not 
#  allowed.
#

def p_identifier(p):
    '''identifier : Identifier
    |               CXXTEST_STD '(' Identifier ')'
    '''
    if p[1][0] in ('t','T','c','d'):
        identifier_lineno[p[1]] = p.lineno(1)
    p[0] = p[1]

def p_id(p):
    '''id :                         identifier %prec SHIFT_THERE
    |                               template_decl
    |                               TEMPLATE id
    '''
    p[0] = get_rest(p)

def p_global_scope(p):
    '''global_scope :               SCOPE
    '''
    p[0] = get_rest(p)

def p_id_scope(p):
    '''id_scope : id SCOPE'''
    p[0] = get_rest(p)

def p_id_scope_seq(p):
    '''id_scope_seq :                id_scope
    |                                id_scope id_scope_seq
    '''
    p[0] = get_rest(p)

#
#  A :: B :: C; is ambiguous How much is type and how much name ?
#  The %prec maximises the (type) length which is the 7.1-2 semantic constraint.
#
def p_nested_id(p):
    '''nested_id :                  id %prec SHIFT_THERE
    |                               id_scope nested_id
    '''
    p[0] = get_rest(p)

def p_scoped_id(p):
    '''scoped_id :                  nested_id
    |                               global_scope nested_id
    |                               id_scope_seq
    |                               global_scope id_scope_seq
    '''
    global scope_lineno
    scope_lineno = lexer.lineno
    data = flatten(get_rest(p))
    if data[0] != None:
        p[0] = "".join(data)

#
#  destructor_id has to be held back to avoid a conflict with a one's 
#  complement as per 5.3.1-9, It gets put back only when scoped or in a 
#  declarator_id, which is only used as an explicit member name.
#  Declarations of an unscoped destructor are always parsed as a one's 
#  complement.
#
def p_destructor_id(p):
    '''destructor_id :              '~' id
    |                               TEMPLATE destructor_id
    '''
    p[0]=get_rest(p)

#def p_template_id(p):
#    '''template_id :                empty
#    |                               TEMPLATE
#    '''
#    pass

def p_template_decl(p):
    '''template_decl :              identifier '<' nonlgt_seq_opt '>'
    '''
    #
    # WEH: should we include the lt/gt symbols to indicate that this is a
    # template class?  How is that going to be used later???
    #
    #p[0] = [p[1] ,"<",">"]
    p[0] = p[1]

def p_special_function_id(p):
    '''special_function_id :        conversion_function_id
    |                               operator_function_id
    |                               TEMPLATE special_function_id
    '''
    p[0]=get_rest(p)

def p_nested_special_function_id(p):
    '''nested_special_function_id : special_function_id
    |                               id_scope destructor_id
    |                               id_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

def p_scoped_special_function_id(p):
    '''scoped_special_function_id : nested_special_function_id
    |                               global_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

# declarator-id is all names in all scopes, except reserved words
def p_declarator_id(p):
    '''declarator_id :              scoped_id
    |                               scoped_special_function_id
    |                               destructor_id
    '''
    p[0]=p[1]

#
# The standard defines pseudo-destructors in terms of type-name, which is 
# class/enum/typedef, of which class-name is covered by a normal destructor. 
# pseudo-destructors are supposed to support ~int() in templates, so the 
# grammar here covers built-in names. Other names are covered by the lack 
# of identifier/type discrimination.
#
def p_built_in_type_id(p):
    '''built_in_type_id :           built_in_type_specifier
    |                               built_in_type_id built_in_type_specifier
    '''
    pass

def p_pseudo_destructor_id(p):
    '''pseudo_destructor_id :       built_in_type_id SCOPE '~' built_in_type_id
    |                               '~' built_in_type_id
    |                               TEMPLATE pseudo_destructor_id
    '''
    pass

def p_nested_pseudo_destructor_id(p):
    '''nested_pseudo_destructor_id : pseudo_destructor_id
    |                               id_scope nested_pseudo_destructor_id
    '''
    pass

def p_scoped_pseudo_destructor_id(p):
    '''scoped_pseudo_destructor_id : nested_pseudo_destructor_id
    |                               global_scope scoped_pseudo_destructor_id
    '''
    pass

#-------------------------------------------------------------------------------
# A.2 Lexical conventions
#-------------------------------------------------------------------------------
#

def p_literal(p):
    '''literal :                    IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               TRUE
    |                               FALSE
    '''
    pass

#-------------------------------------------------------------------------------
# A.3 Basic concepts
#-------------------------------------------------------------------------------
def p_translation_unit(p):
    '''translation_unit :           declaration_seq_opt
    '''
    pass

#-------------------------------------------------------------------------------
# A.4 Expressions
#-------------------------------------------------------------------------------
#
#  primary_expression covers an arbitrary sequence of all names with the 
#  exception of an unscoped destructor, which is parsed as its unary expression 
#  which is the correct disambiguation (when ambiguous).  This eliminates the 
#  traditional A(B) meaning A B ambiguity, since we never have to tack an A 
#  onto the front of something that might start with (. The name length got 
#  maximised ab initio. The downside is that semantic interpretation must split 
#  the names up again.
#
#  Unification of the declaration and expression syntax means that unary and 
#  binary pointer declarator operators:
#      int * * name
#  are parsed as binary and unary arithmetic operators (int) * (*name). Since 
#  type information is not used
#  ambiguities resulting from a cast
#      (cast)*(value)
#  are resolved to favour the binary rather than the cast unary to ease AST 
#  clean-up. The cast-call ambiguity must be resolved to the cast to ensure 
#  that (a)(b)c can be parsed.
#
#  The problem of the functional cast ambiguity
#      name(arg)
#  as call or declaration is avoided by maximising the name within the parsing 
#  kernel. So  primary_id_expression picks up 
#      extern long int const var = 5;
#  as an assignment to the syntax parsed as "extern long int const var". The 
#  presence of two names is parsed so that "extern long into const" is 
#  distinguished from "var" considerably simplifying subsequent 
#  semantic resolution.
#
#  The generalised name is a concatenation of potential type-names (scoped 
#  identifiers or built-in sequences) plus optionally one of the special names 
#  such as an operator-function-id, conversion-function-id or destructor as the 
#  final name. 
#

def get_rest(p):
    return [p[i] for i in range(1, len(p))]

def p_primary_expression(p):
    '''primary_expression :         literal
    |                               THIS
    |                               suffix_decl_specified_ids
    |                               abstract_expression %prec REDUCE_HERE_MOSTLY
    '''
    p[0] = get_rest(p)

#
#  Abstract-expression covers the () and [] of abstract-declarators.
#
def p_abstract_expression(p):
    '''abstract_expression :        parenthesis_clause
    |                               LBRACKET bexpression_opt RBRACKET
    |                               TEMPLATE abstract_expression
    '''
    pass

def p_postfix_expression(p):
    '''postfix_expression :         primary_expression
    |                               postfix_expression parenthesis_clause
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET attributes
    |                               postfix_expression '.' declarator_id
    |                               postfix_expression '.' scoped_pseudo_destructor_id
    |                               postfix_expression ARROW declarator_id
    |                               postfix_expression ARROW scoped_pseudo_destructor_id   
    |                               postfix_expression INC
    |                               postfix_expression DEC
    |                               DYNAMIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               STATIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               REINTERPRET_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               CONST_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               TYPEID parameters_clause
    '''
    #print "HERE",str(p[1])
    p[0] = get_rest(p)

def p_bexpression_opt(p):
    '''bexpression_opt :            empty
    |                               bexpression
    '''
    pass

def p_bexpression(p):
    '''bexpression :                nonbracket_seq
    |                               nonbracket_seq bexpression_seq bexpression_clause nonbracket_seq_opt
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_seq(p):
    '''bexpression_seq :            empty
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_clause(p):
    '''bexpression_clause :          LBRACKET bexpression_opt RBRACKET
    '''
    pass


def p_expression_list_opt(p):
    '''expression_list_opt :        empty
    |                               expression_list
    '''
    pass

def p_expression_list(p):
    '''expression_list :            assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    pass

def p_unary_expression(p):
    '''unary_expression :           postfix_expression
    |                               INC cast_expression
    |                               DEC cast_expression
    |                               ptr_operator cast_expression
    |                               suffix_decl_specified_scope star_ptr_operator cast_expression
    |                               '+' cast_expression
    |                               '-' cast_expression
    |                               '!' cast_expression
    |                               '~' cast_expression
    |                               SIZEOF unary_expression
    |                               new_expression
    |                               global_scope new_expression
    |                               delete_expression
    |                               global_scope delete_expression
    '''
    p[0] = get_rest(p)

def p_delete_expression(p):
    '''delete_expression :          DELETE cast_expression
    '''
    pass

def p_new_expression(p):
    '''new_expression :             NEW new_type_id new_initializer_opt
    |                               NEW parameters_clause new_type_id new_initializer_opt
    |                               NEW parameters_clause
    |                               NEW parameters_clause parameters_clause new_initializer_opt
    '''
    pass

def p_new_type_id(p):
    '''new_type_id :                type_specifier ptr_operator_seq_opt
    |                               type_specifier new_declarator
    |                               type_specifier new_type_id
    '''
    pass

def p_new_declarator(p):
    '''new_declarator :             ptr_operator new_declarator
    |                               direct_new_declarator
    '''
    pass

def p_direct_new_declarator(p):
    '''direct_new_declarator :      LBRACKET bexpression_opt RBRACKET
    |                               direct_new_declarator LBRACKET bexpression RBRACKET
    '''
    pass

def p_new_initializer_opt(p):
    '''new_initializer_opt :        empty
    |                               '(' expression_list_opt ')'
    '''
    pass

#
# cast-expression is generalised to support a [] as well as a () prefix. This covers the omission of 
# DELETE[] which when followed by a parenthesised expression was ambiguous. It also covers the gcc 
# indexed array initialisation for free.
#
def p_cast_expression(p):
    '''cast_expression :            unary_expression
    |                               abstract_expression cast_expression
    '''
    p[0] = get_rest(p)

def p_pm_expression(p):
    '''pm_expression :              cast_expression
    |                               pm_expression DOT_STAR cast_expression
    |                               pm_expression ARROW_STAR cast_expression
    '''
    p[0] = get_rest(p)

def p_multiplicative_expression(p):
    '''multiplicative_expression :  pm_expression
    |                               multiplicative_expression star_ptr_operator pm_expression
    |                               multiplicative_expression '/' pm_expression
    |                               multiplicative_expression '%' pm_expression
    '''
    p[0] = get_rest(p)

def p_additive_expression(p):
    '''additive_expression :        multiplicative_expression
    |                               additive_expression '+' multiplicative_expression
    |                               additive_expression '-' multiplicative_expression
    '''
    p[0] = get_rest(p)

def p_shift_expression(p):
    '''shift_expression :           additive_expression
    |                               shift_expression SHL additive_expression
    |                               shift_expression SHR additive_expression
    '''
    p[0] = get_rest(p)

#    |                               relational_expression '<' shift_expression
#    |                               relational_expression '>' shift_expression
#    |                               relational_expression LE shift_expression
#    |                               relational_expression GE shift_expression
def p_relational_expression(p):
    '''relational_expression :      shift_expression
    '''
    p[0] = get_rest(p)

def p_equality_expression(p):
    '''equality_expression :        relational_expression
    |                               equality_expression EQ relational_expression
    |                               equality_expression NE relational_expression
    '''
    p[0] = get_rest(p)

def p_and_expression(p):
    '''and_expression :             equality_expression
    |                               and_expression '&' equality_expression
    '''
    p[0] = get_rest(p)

def p_exclusive_or_expression(p):
    '''exclusive_or_expression :    and_expression
    |                               exclusive_or_expression '^' and_expression
    '''
    p[0] = get_rest(p)

def p_inclusive_or_expression(p):
    '''inclusive_or_expression :    exclusive_or_expression
    |                               inclusive_or_expression '|' exclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_and_expression(p):
    '''logical_and_expression :     inclusive_or_expression
    |                               logical_and_expression LOG_AND inclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_or_expression(p):
    '''logical_or_expression :      logical_and_expression
    |                               logical_or_expression LOG_OR logical_and_expression
    '''
    p[0] = get_rest(p)

def p_conditional_expression(p):
    '''conditional_expression :     logical_or_expression
    |                               logical_or_expression '?' expression ':' assignment_expression
    '''
    p[0] = get_rest(p)


#
# assignment-expression is generalised to cover the simple assignment of a braced initializer in order to 
# contribute to the coverage of parameter-declaration and init-declaration.
#
#    |                               logical_or_expression assignment_operator assignment_expression
def p_assignment_expression(p):
    '''assignment_expression :      conditional_expression
    |                               logical_or_expression assignment_operator nonsemicolon_seq
    |                               logical_or_expression '=' braced_initializer
    |                               throw_expression
    '''
    p[0]=get_rest(p)

def p_assignment_operator(p):
    '''assignment_operator :        '=' 
                           | ASS_ADD
                           | ASS_AND
                           | ASS_DIV
                           | ASS_MOD
                           | ASS_MUL
                           | ASS_OR
                           | ASS_SHL
                           | ASS_SHR
                           | ASS_SUB
                           | ASS_XOR
    '''
    pass

#
# expression is widely used and usually single-element, so the reductions are arranged so that a
# single-element expression is returned as is. Multi-element expressions are parsed as a list that
# may then behave polymorphically as an element or be compacted to an element.
#

def p_expression(p):
    '''expression :                 assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    p[0] = get_rest(p)

def p_constant_expression(p):
    '''constant_expression :        conditional_expression
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.5 Statements
#---------------------------------------------------------------------------------------------------
# Parsing statements is easy once simple_declaration has been generalised to cover expression_statement.
#
#
# The use of extern here is a hack.  The 'extern "C" {}' block gets parsed
# as a function, so when nested 'extern "C"' declarations exist, they don't
# work because the block is viewed as a list of statements... :(
#
def p_statement(p):
    '''statement :                  compound_statement
    |                               declaration_statement
    |                               try_block
    |                               labeled_statement
    |                               selection_statement
    |                               iteration_statement
    |                               jump_statement
    '''
    pass

def p_compound_statement(p):
    '''compound_statement :         LBRACE statement_seq_opt RBRACE
    '''
    pass

def p_statement_seq_opt(p):
    '''statement_seq_opt :          empty
    |                               statement_seq_opt statement
    '''
    pass

#
#  The dangling else conflict is resolved to the innermost if.
#
def p_selection_statement(p):
    '''selection_statement :        IF '(' condition ')' statement    %prec SHIFT_THERE
    |                               IF '(' condition ')' statement ELSE statement
    |                               SWITCH '(' condition ')' statement
    '''
    pass

def p_condition_opt(p):
    '''condition_opt :              empty
    |                               condition
    '''
    pass

def p_condition(p):
    '''condition :                  nonparen_seq
    |                               nonparen_seq condition_seq parameters_clause nonparen_seq_opt
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_condition_seq(p):
    '''condition_seq :              empty
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_labeled_statement(p):
    '''labeled_statement :          identifier ':' statement
    |                               CASE constant_expression ':' statement
    |                               DEFAULT ':' statement
    '''
    pass

def p_try_block(p):
    '''try_block :                  TRY compound_statement handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_jump_statement(p):
    '''jump_statement :             BREAK ';'
    |                               CONTINUE ';'
    |                               RETURN nonsemicolon_seq ';'
    |                               GOTO identifier ';'
    '''
    pass

def p_iteration_statement(p):
    '''iteration_statement :        WHILE '(' condition ')' statement
    |                               DO statement WHILE '(' expression ')' ';'
    |                               FOR '(' nonparen_seq_opt ')' statement
    '''
    pass

def p_declaration_statement(p):
    '''declaration_statement :      block_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.6 Declarations
#---------------------------------------------------------------------------------------------------
def p_compound_declaration(p):
    '''compound_declaration :       LBRACE declaration_seq_opt RBRACE                            
    '''
    pass

def p_declaration_seq_opt(p):
    '''declaration_seq_opt :        empty
    |                               declaration_seq_opt declaration
    '''
    pass

def p_declaration(p):
    '''declaration :                block_declaration
    |                               function_definition
    |                               template_declaration
    |                               explicit_specialization
    |                               specialised_declaration
    '''
    pass

def p_specialised_declaration(p):
    '''specialised_declaration :    linkage_specification
    |                               namespace_definition
    |                               TEMPLATE specialised_declaration
    '''
    pass

def p_block_declaration(p):
    '''block_declaration :          simple_declaration
    |                               specialised_block_declaration
    '''
    pass

def p_specialised_block_declaration(p):
    '''specialised_block_declaration :      asm_definition
    |                               namespace_alias_definition
    |                               using_declaration
    |                               using_directive
    |                               TEMPLATE specialised_block_declaration
    '''
    pass

def p_simple_declaration(p):
    '''simple_declaration :         ';'
    |                               init_declaration ';'
    |                               init_declarations ';'
    |                               decl_specifier_prefix simple_declaration
    '''
    global _parse_info
    if len(p) == 3:
        if p[2] == ";":
            decl = p[1]
        else:
            decl = p[2]
        if decl is not None:
            fp = flatten(decl)
            if len(fp) >= 2 and fp[0] is not None and fp[0]!="operator" and fp[1] == '(':
                p[0] = fp[0]
                _parse_info.add_function(fp[0])

#
#  A decl-specifier following a ptr_operator provokes a shift-reduce conflict for * const name which is resolved in favour of the pointer, and implemented by providing versions of decl-specifier guaranteed not to start with a cv_qualifier.  decl-specifiers are implemented type-centrically. That is the semantic constraint that there must be a type is exploited to impose structure, but actually eliminate very little syntax. built-in types are multi-name and so need a different policy.
#
#  non-type decl-specifiers are bound to the left-most type in a decl-specifier-seq, by parsing from the right and attaching suffixes to the right-hand type. Finally residual prefixes attach to the left.                
#
def p_suffix_built_in_decl_specifier_raw(p):
    '''suffix_built_in_decl_specifier_raw : built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw decl_specifier_suffix
    '''
    pass

def p_suffix_built_in_decl_specifier(p):
    '''suffix_built_in_decl_specifier :     suffix_built_in_decl_specifier_raw
    |                               TEMPLATE suffix_built_in_decl_specifier
    '''
    pass

#    |                                       id_scope_seq
#    |                                       SCOPE id_scope_seq
def p_suffix_named_decl_specifier(p):
    '''suffix_named_decl_specifier :        scoped_id 
    |                               elaborate_type_specifier 
    |                               suffix_named_decl_specifier decl_specifier_suffix
    '''
    p[0]=get_rest(p)

def p_suffix_named_decl_specifier_bi(p):
    '''suffix_named_decl_specifier_bi :     suffix_named_decl_specifier
    |                               suffix_named_decl_specifier suffix_built_in_decl_specifier_raw
    '''
    p[0] = get_rest(p)
    #print "HERE",get_rest(p)

def p_suffix_named_decl_specifiers(p):
    '''suffix_named_decl_specifiers :       suffix_named_decl_specifier_bi
    |                               suffix_named_decl_specifiers suffix_named_decl_specifier_bi
    '''
    p[0] = get_rest(p)

def p_suffix_named_decl_specifiers_sf(p):
    '''suffix_named_decl_specifiers_sf :    scoped_special_function_id
    |                               suffix_named_decl_specifiers
    |                               suffix_named_decl_specifiers scoped_special_function_id
    '''
    #print "HERE",get_rest(p)
    p[0] = get_rest(p)

def p_suffix_decl_specified_ids(p):
    '''suffix_decl_specified_ids :          suffix_built_in_decl_specifier
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers_sf
    |                               suffix_named_decl_specifiers_sf
    '''
    if len(p) == 3:
        p[0] = p[2]
    else:
        p[0] = p[1]

def p_suffix_decl_specified_scope(p):
    '''suffix_decl_specified_scope : suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier SCOPE
    '''
    p[0] = get_rest(p)

def p_decl_specifier_affix(p):
    '''decl_specifier_affix :       storage_class_specifier
    |                               function_specifier
    |                               FRIEND
    |                               TYPEDEF
    |                               cv_qualifier
    '''
    pass

def p_decl_specifier_suffix(p):
    '''decl_specifier_suffix :      decl_specifier_affix
    '''
    pass

def p_decl_specifier_prefix(p):
    '''decl_specifier_prefix :      decl_specifier_affix
    |                               TEMPLATE decl_specifier_prefix
    '''
    pass

def p_storage_class_specifier(p):
    '''storage_class_specifier :    REGISTER 
    |                               STATIC 
    |                               MUTABLE
    |                               EXTERN                  %prec SHIFT_THERE
    |                               EXTENSION
    |                               AUTO
    '''
    pass

def p_function_specifier(p):
    '''function_specifier :         EXPLICIT
    |                               INLINE
    |                               VIRTUAL
    '''
    pass

def p_type_specifier(p):
    '''type_specifier :             simple_type_specifier
    |                               elaborate_type_specifier
    |                               cv_qualifier
    '''
    pass

def p_elaborate_type_specifier(p):
    '''elaborate_type_specifier :   class_specifier
    |                               enum_specifier
    |                               elaborated_type_specifier
    |                               TEMPLATE elaborate_type_specifier
    '''
    pass

def p_simple_type_specifier(p):
    '''simple_type_specifier :      scoped_id
    |                               scoped_id attributes
    |                               built_in_type_specifier
    '''
    p[0] = p[1]

def p_built_in_type_specifier(p):
    '''built_in_type_specifier : Xbuilt_in_type_specifier
    |                            Xbuilt_in_type_specifier attributes
    '''
    pass

def p_attributes(p):
    '''attributes :                 attribute
    |                               attributes attribute
    '''
    pass

def p_attribute(p):
    '''attribute :                  ATTRIBUTE '(' parameters_clause ')'
    '''

def p_Xbuilt_in_type_specifier(p):
    '''Xbuilt_in_type_specifier :    CHAR 
    | WCHAR_T 
    | BOOL 
    | SHORT 
    | INT 
    | LONG 
    | SIGNED 
    | UNSIGNED 
    | FLOAT 
    | DOUBLE 
    | VOID
    | uTYPEOF parameters_clause
    | TYPEOF parameters_clause
    '''
    pass

#
#  The over-general use of declaration_expression to cover decl-specifier-seq_opt declarator in a function-definition means that
#      class X { };
#  could be a function-definition or a class-specifier.
#      enum X { };
#  could be a function-definition or an enum-specifier.
#  The function-definition is not syntactically valid so resolving the false conflict in favour of the
#  elaborated_type_specifier is correct.
#
def p_elaborated_type_specifier(p):
    '''elaborated_type_specifier :  class_key scoped_id %prec SHIFT_THERE
    |                               elaborated_enum_specifier
    |                               TYPENAME scoped_id
    '''
    pass

def p_elaborated_enum_specifier(p):
    '''elaborated_enum_specifier :  ENUM scoped_id   %prec SHIFT_THERE
    '''
    pass

def p_enum_specifier(p):
    '''enum_specifier :             ENUM scoped_id enumerator_clause
    |                               ENUM enumerator_clause
    '''
    pass

def p_enumerator_clause(p):
    '''enumerator_clause :          LBRACE enumerator_list_ecarb
    |                               LBRACE enumerator_list enumerator_list_ecarb
    |                               LBRACE enumerator_list ',' enumerator_definition_ecarb
    '''
    pass

def p_enumerator_list_ecarb(p):
    '''enumerator_list_ecarb :      RBRACE
    '''
    pass

def p_enumerator_definition_ecarb(p):
    '''enumerator_definition_ecarb :        RBRACE
    '''
    pass

def p_enumerator_definition_filler(p):
    '''enumerator_definition_filler :       empty
    '''
    pass

def p_enumerator_list_head(p):
    '''enumerator_list_head :       enumerator_definition_filler
    |                               enumerator_list ',' enumerator_definition_filler
    '''
    pass

def p_enumerator_list(p):
    '''enumerator_list :            enumerator_list_head enumerator_definition
    '''
    pass

def p_enumerator_definition(p):
    '''enumerator_definition :      enumerator
    |                               enumerator '=' constant_expression
    '''
    pass

def p_enumerator(p):
    '''enumerator :                 identifier
    '''
    pass

def p_namespace_definition(p):
    '''namespace_definition :       NAMESPACE scoped_id push_scope compound_declaration
    |                               NAMESPACE push_scope compound_declaration
    '''
    global _parse_info
    scope = _parse_info.pop_scope()

def p_namespace_alias_definition(p):
    '''namespace_alias_definition : NAMESPACE scoped_id '=' scoped_id ';'
    '''
    pass

def p_push_scope(p):
    '''push_scope :                 empty'''
    global _parse_info
    if p[-2] == "namespace":
        scope=p[-1]
    else:
        scope=""
    _parse_info.push_scope(scope,"namespace")

def p_using_declaration(p):
    '''using_declaration :          USING declarator_id ';'
    |                               USING TYPENAME declarator_id ';'
    '''
    pass

def p_using_directive(p):
    '''using_directive :            USING NAMESPACE scoped_id ';'
    '''
    pass

#    '''asm_definition :             ASM '(' StringLiteral ')' ';'
def p_asm_definition(p):
    '''asm_definition :             ASM '(' nonparen_seq_opt ')' ';'
    '''
    pass

def p_linkage_specification(p):
    '''linkage_specification :      EXTERN CLiteral declaration
    |                               EXTERN CLiteral compound_declaration
    |                               EXTERN CppLiteral declaration
    |                               EXTERN CppLiteral compound_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.7 Declarators
#---------------------------------------------------------------------------------------------------
#
# init-declarator is named init_declaration to reflect the embedded decl-specifier-seq_opt
#

def p_init_declarations(p):
    '''init_declarations :          assignment_expression ',' init_declaration
    |                               init_declarations ',' init_declaration
    '''
    p[0]=get_rest(p)

def p_init_declaration(p):
    '''init_declaration :           assignment_expression
    '''
    p[0]=get_rest(p)

def p_star_ptr_operator(p):
    '''star_ptr_operator :          '*'
    |                               star_ptr_operator cv_qualifier
    '''
    pass

def p_nested_ptr_operator(p):
    '''nested_ptr_operator :        star_ptr_operator
    |                               id_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator(p):
    '''ptr_operator :               '&'
    |                               nested_ptr_operator
    |                               global_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator_seq(p):
    '''ptr_operator_seq :           ptr_operator
    |                               ptr_operator ptr_operator_seq
    '''
    pass

#
# Independently coded to localise the shift-reduce conflict: sharing just needs another %prec
#
def p_ptr_operator_seq_opt(p):
    '''ptr_operator_seq_opt :       empty %prec SHIFT_THERE
    |                               ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_cv_qualifier_seq_opt(p):
    '''cv_qualifier_seq_opt :       empty
    |                               cv_qualifier_seq_opt cv_qualifier
    '''
    pass

# TODO: verify that we should include attributes here
def p_cv_qualifier(p):
    '''cv_qualifier :               CONST 
    |                               VOLATILE
    |                               attributes
    '''
    pass

def p_type_id(p):
    '''type_id :                    type_specifier abstract_declarator_opt
    |                               type_specifier type_id
    '''
    pass

def p_abstract_declarator_opt(p):
    '''abstract_declarator_opt :    empty
    |                               ptr_operator abstract_declarator_opt
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator_opt(p):
    '''direct_abstract_declarator_opt :     empty
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator(p):
    '''direct_abstract_declarator : direct_abstract_declarator_opt parenthesis_clause
    |                               direct_abstract_declarator_opt LBRACKET RBRACKET
    |                               direct_abstract_declarator_opt LBRACKET bexpression RBRACKET
    '''
    pass

def p_parenthesis_clause(p):
    '''parenthesis_clause :         parameters_clause cv_qualifier_seq_opt
    |                               parameters_clause cv_qualifier_seq_opt exception_specification
    '''
    p[0] = ['(',')']

def p_parameters_clause(p):
    '''parameters_clause :          '(' condition_opt ')'
    '''
    p[0] = ['(',')']

#
# A typed abstract qualifier such as
#      Class * ...
# looks like a multiply, so pointers are parsed as their binary operation equivalents that
# ultimately terminate with a degenerate right hand term.
#
def p_abstract_pointer_declaration(p):
    '''abstract_pointer_declaration :       ptr_operator_seq
    |                               multiplicative_expression star_ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_abstract_parameter_declaration(p):
    '''abstract_parameter_declaration :     abstract_pointer_declaration
    |                               and_expression '&'
    |                               and_expression '&' abstract_pointer_declaration
    '''
    pass

def p_special_parameter_declaration(p):
    '''special_parameter_declaration :      abstract_parameter_declaration
    |                               abstract_parameter_declaration '=' assignment_expression
    |                               ELLIPSIS
    '''
    pass

def p_parameter_declaration(p):
    '''parameter_declaration :      assignment_expression
    |                               special_parameter_declaration
    |                               decl_specifier_prefix parameter_declaration
    '''
    pass

#
# function_definition includes constructor, destructor, implicit int definitions too.  A local destructor is successfully parsed as a function-declaration but the ~ was treated as a unary operator.  constructor_head is the prefix ambiguity between a constructor and a member-init-list starting with a bit-field.
#
def p_function_definition(p):
    '''function_definition :        ctor_definition
    |                               func_definition
    '''
    pass

def p_func_definition(p):
    '''func_definition :            assignment_expression function_try_block
    |                               assignment_expression function_body
    |                               decl_specifier_prefix func_definition
    '''
    global _parse_info
    if p[2] is not None and p[2][0] == '{':
        decl = flatten(p[1])
        #print "HERE",decl
        if decl[-1] == ')':
            decl=decl[-3]
        else:
            decl=decl[-1]
        p[0] = decl
        if decl != "operator":
            _parse_info.add_function(decl)
    else:
        p[0] = p[2]

def p_ctor_definition(p):
    '''ctor_definition :            constructor_head function_try_block
    |                               constructor_head function_body
    |                               decl_specifier_prefix ctor_definition
    '''
    if p[2] is None or p[2][0] == "try" or p[2][0] == '{':
        p[0]=p[1]
    else:
        p[0]=p[1]

def p_constructor_head(p):
    '''constructor_head :           bit_field_init_declaration
    |                               constructor_head ',' assignment_expression
    '''
    p[0]=p[1]

def p_function_try_block(p):
    '''function_try_block :         TRY function_block handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False
    p[0] = ['try']

def p_function_block(p):
    '''function_block :             ctor_initializer_opt function_body
    '''
    pass

def p_function_body(p):
    '''function_body :              LBRACE nonbrace_seq_opt RBRACE 
    '''
    p[0] = ['{','}']

def p_initializer_clause(p):
    '''initializer_clause :         assignment_expression
    |                               braced_initializer
    '''
    pass

def p_braced_initializer(p):
    '''braced_initializer :         LBRACE initializer_list RBRACE
    |                               LBRACE initializer_list ',' RBRACE
    |                               LBRACE RBRACE
    '''
    pass

def p_initializer_list(p):
    '''initializer_list :           initializer_clause
    |                               initializer_list ',' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.8 Classes
#---------------------------------------------------------------------------------------------------
#
#  An anonymous bit-field declaration may look very like inheritance:
#      const int B = 3;
#      class A : B ;
#  The two usages are too distant to try to create and enforce a common prefix so we have to resort to
#  a parser hack by backtracking. Inheritance is much the most likely so we mark the input stream context
#  and try to parse a base-clause. If we successfully reach a { the base-clause is ok and inheritance was
#  the correct choice so we unmark and continue. If we fail to find the { an error token causes 
#  back-tracking to the alternative parse in elaborated_type_specifier which regenerates the : and 
#  declares unconditional success.
#

def p_class_specifier_head(p):
    '''class_specifier_head :       class_key scoped_id ':' base_specifier_list LBRACE
    |                               class_key ':' base_specifier_list LBRACE
    |                               class_key scoped_id LBRACE
    |                               class_key LBRACE
    '''
    global _parse_info
    base_classes=[]
    if len(p) == 6:
        scope = p[2]
        base_classes = p[4]
    elif len(p) == 4:
        scope = p[2]
    elif len(p) == 5:
        base_classes = p[3]
    else:
        scope = ""
    _parse_info.push_scope(scope,p[1],base_classes)
    

def p_class_key(p):
    '''class_key :                  CLASS 
    | STRUCT 
    | UNION
    '''
    p[0] = p[1]

def p_class_specifier(p):
    '''class_specifier :            class_specifier_head member_specification_opt RBRACE
    '''
    scope = _parse_info.pop_scope()

def p_member_specification_opt(p):
    '''member_specification_opt :   empty
    |                               member_specification_opt member_declaration
    '''
    pass

def p_member_declaration(p):
    '''member_declaration :         accessibility_specifier
    |                               simple_member_declaration
    |                               function_definition
    |                               using_declaration
    |                               template_declaration
    '''
    p[0] = get_rest(p)
    #print "Decl",get_rest(p)

#
#  The generality of constructor names (there need be no parenthesised argument list) means that that
#          name : f(g), h(i)
#  could be the start of a constructor or the start of an anonymous bit-field. An ambiguity is avoided by
#  parsing the ctor-initializer of a function_definition as a bit-field.
#
def p_simple_member_declaration(p):
    '''simple_member_declaration :  ';'
    |                               assignment_expression ';'
    |                               constructor_head ';'
    |                               member_init_declarations ';'
    |                               decl_specifier_prefix simple_member_declaration
    '''
    global _parse_info
    decl = flatten(get_rest(p))
    if len(decl) >= 4 and decl[-3] == "(":
        _parse_info.add_function(decl[-4])

def p_member_init_declarations(p):
    '''member_init_declarations :   assignment_expression ',' member_init_declaration
    |                               constructor_head ',' bit_field_init_declaration
    |                               member_init_declarations ',' member_init_declaration
    '''
    pass

def p_member_init_declaration(p):
    '''member_init_declaration :    assignment_expression
    |                               bit_field_init_declaration
    '''
    pass

def p_accessibility_specifier(p):
    '''accessibility_specifier :    access_specifier ':'
    '''
    pass

def p_bit_field_declaration(p):
    '''bit_field_declaration :      assignment_expression ':' bit_field_width
    |                               ':' bit_field_width
    '''
    if len(p) == 4:
        p[0]=p[1]

def p_bit_field_width(p):
    '''bit_field_width :            logical_or_expression
    |                               logical_or_expression '?' bit_field_width ':' bit_field_width
    '''
    pass

def p_bit_field_init_declaration(p):
    '''bit_field_init_declaration : bit_field_declaration
    |                               bit_field_declaration '=' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.9 Derived classes
#---------------------------------------------------------------------------------------------------
def p_base_specifier_list(p):
    '''base_specifier_list :        base_specifier
    |                               base_specifier_list ',' base_specifier
    '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]+[p[3]]

def p_base_specifier(p):
    '''base_specifier :             scoped_id
    |                               access_specifier base_specifier
    |                               VIRTUAL base_specifier
    '''
    if len(p) == 2:
        p[0] = p[1]
    else:
        p[0] = p[2]

def p_access_specifier(p):
    '''access_specifier :           PRIVATE 
    |                               PROTECTED 
    |                               PUBLIC
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.10 Special member functions
#---------------------------------------------------------------------------------------------------
def p_conversion_function_id(p):
    '''conversion_function_id :     OPERATOR conversion_type_id
    '''
    p[0] = ['operator']

def p_conversion_type_id(p):
    '''conversion_type_id :         type_specifier ptr_operator_seq_opt
    |                               type_specifier conversion_type_id
    '''
    pass

#
#  Ctor-initialisers can look like a bit field declaration, given the generalisation of names:
#      Class(Type) : m1(1), m2(2) { }
#      NonClass(bit_field) : int(2), second_variable, ...
#  The grammar below is used within a function_try_block or function_definition.
#  See simple_member_declaration for use in normal member function_definition.
#
def p_ctor_initializer_opt(p):
    '''ctor_initializer_opt :       empty
    |                               ctor_initializer
    '''
    pass

def p_ctor_initializer(p):
    '''ctor_initializer :           ':' mem_initializer_list
    '''
    pass

def p_mem_initializer_list(p):
    '''mem_initializer_list :       mem_initializer
    |                               mem_initializer_list_head mem_initializer
    '''
    pass

def p_mem_initializer_list_head(p):
    '''mem_initializer_list_head :  mem_initializer_list ','
    '''
    pass

def p_mem_initializer(p):
    '''mem_initializer :            mem_initializer_id '(' expression_list_opt ')'
    '''
    pass

def p_mem_initializer_id(p):
    '''mem_initializer_id :         scoped_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.11 Overloading
#---------------------------------------------------------------------------------------------------

def p_operator_function_id(p):
    '''operator_function_id :       OPERATOR operator
    |                               OPERATOR '(' ')'
    |                               OPERATOR LBRACKET RBRACKET
    |                               OPERATOR '<'
    |                               OPERATOR '>'
    |                               OPERATOR operator '<' nonlgt_seq_opt '>'
    '''
    p[0] = ["operator"]

#
#  It is not clear from the ANSI standard whether spaces are permitted in delete[]. If not then it can
#  be recognised and returned as DELETE_ARRAY by the lexer. Assuming spaces are permitted there is an
#  ambiguity created by the over generalised nature of expressions. operator new is a valid delarator-id
#  which we may have an undimensioned array of. Semantic rubbish, but syntactically valid. Since the
#  array form is covered by the declarator consideration we can exclude the operator here. The need
#  for a semantic rescue can be eliminated at the expense of a couple of shift-reduce conflicts by
#  removing the comments on the next four lines.
#
def p_operator(p):
    '''operator :                   NEW
    |                               DELETE
    |                               '+'
    |                               '-'
    |                               '*'
    |                               '/'
    |                               '%'
    |                               '^'
    |                               '&'
    |                               '|'
    |                               '~'
    |                               '!'
    |                               '='
    |                               ASS_ADD
    |                               ASS_SUB
    |                               ASS_MUL
    |                               ASS_DIV
    |                               ASS_MOD
    |                               ASS_XOR
    |                               ASS_AND
    |                               ASS_OR
    |                               SHL
    |                               SHR
    |                               ASS_SHR
    |                               ASS_SHL
    |                               EQ
    |                               NE
    |                               LE
    |                               GE
    |                               LOG_AND
    |                               LOG_OR
    |                               INC
    |                               DEC
    |                               ','
    |                               ARROW_STAR
    |                               ARROW
    '''
    p[0]=p[1]

#    |                               IF
#    |                               SWITCH
#    |                               WHILE
#    |                               FOR
#    |                               DO
def p_reserved(p):
    '''reserved :                   PRIVATE
    |                               CLiteral
    |                               CppLiteral
    |                               IF
    |                               SWITCH
    |                               WHILE
    |                               FOR
    |                               DO
    |                               PROTECTED
    |                               PUBLIC
    |                               BOOL
    |                               CHAR
    |                               DOUBLE
    |                               FLOAT
    |                               INT
    |                               LONG
    |                               SHORT
    |                               SIGNED
    |                               UNSIGNED
    |                               VOID
    |                               WCHAR_T
    |                               CLASS
    |                               ENUM
    |                               NAMESPACE
    |                               STRUCT
    |                               TYPENAME
    |                               UNION
    |                               CONST
    |                               VOLATILE
    |                               AUTO
    |                               EXPLICIT
    |                               EXPORT
    |                               EXTERN
    |                               FRIEND
    |                               INLINE
    |                               MUTABLE
    |                               REGISTER
    |                               STATIC
    |                               TEMPLATE
    |                               TYPEDEF
    |                               USING
    |                               VIRTUAL
    |                               ASM
    |                               BREAK
    |                               CASE
    |                               CATCH
    |                               CONST_CAST
    |                               CONTINUE
    |                               DEFAULT
    |                               DYNAMIC_CAST
    |                               ELSE
    |                               FALSE
    |                               GOTO
    |                               OPERATOR
    |                               REINTERPRET_CAST
    |                               RETURN
    |                               SIZEOF
    |                               STATIC_CAST
    |                               THIS
    |                               THROW
    |                               TRUE
    |                               TRY
    |                               TYPEID
    |                               ATTRIBUTE
    |                               CDECL
    |                               TYPEOF
    |                               uTYPEOF
    '''
    if p[1] in ('try', 'catch', 'throw'):
        global noExceptionLogic
        noExceptionLogic=False

#---------------------------------------------------------------------------------------------------
# A.12 Templates
#---------------------------------------------------------------------------------------------------
def p_template_declaration(p):
    '''template_declaration :       template_parameter_clause declaration
    |                               EXPORT template_declaration
    '''
    pass

def p_template_parameter_clause(p):
    '''template_parameter_clause :  TEMPLATE '<' nonlgt_seq_opt '>'
    '''
    pass

#
#  Generalised naming makes identifier a valid declaration, so TEMPLATE identifier is too.
#  The TEMPLATE prefix is therefore folded into all names, parenthesis_clause and decl_specifier_prefix.
#
# explicit_instantiation:           TEMPLATE declaration
#
def p_explicit_specialization(p):
    '''explicit_specialization :    TEMPLATE '<' '>' declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.13 Exception Handling
#---------------------------------------------------------------------------------------------------
def p_handler_seq(p):
    '''handler_seq :                handler
    |                               handler handler_seq
    '''
    pass

def p_handler(p):
    '''handler :                    CATCH '(' exception_declaration ')' compound_statement
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_declaration(p):
    '''exception_declaration :      parameter_declaration
    '''
    pass

def p_throw_expression(p):
    '''throw_expression :           THROW
    |                               THROW assignment_expression
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_specification(p):
    '''exception_specification :    THROW '(' ')'
    |                               THROW '(' type_id_list ')'
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_type_id_list(p):
    '''type_id_list :               type_id
    |                               type_id_list ',' type_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# Misc productions
#---------------------------------------------------------------------------------------------------
def p_nonsemicolon_seq(p):
    '''nonsemicolon_seq :           empty
    |                               nonsemicolon_seq nonsemicolon
    '''
    pass

def p_nonsemicolon(p):
    '''nonsemicolon :               misc
    |                               '('
    |                               ')'
    |                               '<'
    |                               '>'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonparen_seq_opt(p):
    '''nonparen_seq_opt :           empty
    |                               nonparen_seq_opt nonparen
    '''
    pass

def p_nonparen_seq(p):
    '''nonparen_seq :               nonparen
    |                               nonparen_seq nonparen
    '''
    pass

def p_nonparen(p):
    '''nonparen :                   misc
    |                               '<'
    |                               '>'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbracket_seq_opt(p):
    '''nonbracket_seq_opt :         empty
    |                               nonbracket_seq_opt nonbracket
    '''
    pass

def p_nonbracket_seq(p):
    '''nonbracket_seq :             nonbracket
    |                               nonbracket_seq nonbracket
    '''
    pass

def p_nonbracket(p):
    '''nonbracket :                 misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbrace_seq_opt(p):
    '''nonbrace_seq_opt :           empty
    |                               nonbrace_seq_opt nonbrace
    '''
    pass

def p_nonbrace(p):
    '''nonbrace :                   misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonlgt_seq_opt(p):
    '''nonlgt_seq_opt :             empty
    |                               nonlgt_seq_opt nonlgt
    '''
    pass

def p_nonlgt(p):
    '''nonlgt :                     misc
    |                               '('
    |                               ')'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               '<' nonlgt_seq_opt '>'
    |                               ';'
    '''
    pass

def p_misc(p):
    '''misc :                       operator
    |                               identifier
    |                               IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               reserved
    |                               '?'
    |                               ':'
    |                               '.'
    |                               SCOPE
    |                               ELLIPSIS
    |                               EXTENSION
    '''
    pass

def p_empty(p):
    '''empty : '''
    pass


#
# Compute column.
#     input is the input text string
#     token is a token instance
#
def _find_column(input,token):
    ''' TODO '''
    i = token.lexpos
    while i > 0:
        if input[i] == '\n': break
        i -= 1
    column = (token.lexpos - i)+1
    return column

def p_error(p):
    if p is None:
        tmp = "Syntax error at end of file."
    else:
        tmp = "Syntax error at token "
        if p.type is "":
            tmp = tmp + "''"
        else:
            tmp = tmp + str(p.type)
        tmp = tmp + " with value '"+str(p.value)+"'"
        tmp = tmp + " in line " + str(lexer.lineno-1)
        tmp = tmp + " at column "+str(_find_column(_parsedata,p))
    raise IOError( tmp )


#
# The function that performs the parsing
#
def parse_cpp(data=None, filename=None, debug=0, optimize=0, verbose=False, func_filter=None):
    if debug > 0:
        print "Debugging parse_cpp!"
        #
        # Always remove the parser.out file, which is generated to create debugging
        #
        if os.path.exists("parser.out"):
            os.remove("parser.out")
        #
        # Remove the parsetab.py* files.  These apparently need to be removed
        # to ensure the creation of a parser.out file.
        #
        if os.path.exists("parsetab.py"):
           os.remove("parsetab.py")
        if os.path.exists("parsetab.pyc"):
           os.remove("parsetab.pyc")
        global debugging
        debugging=True
    #
    # Build lexer
    #
    global lexer
    lexer = lex.lex()
    #
    # Initialize parse object
    #
    global _parse_info
    _parse_info = CppInfo(filter=func_filter)
    _parse_info.verbose=verbose
    #
    # Build yaccer
    #
    write_table = not os.path.exists("parsetab.py")
    yacc.yacc(debug=debug, optimize=optimize, write_tables=write_table)
    #
    # Parse the file
    #
    global _parsedata
    if not data is None:
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data,debug=debug)
    elif not filename is None:
        f = open(filename)
        data = f.read()
        f.close()
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data, debug=debug)
    else:
        return None
    #
    if not noExceptionLogic:
        _parse_info.noExceptionLogic = False
    else:
        for key in identifier_lineno:
            if 'ASSERT_THROWS' in key:
                _parse_info.noExceptionLogic = False
                break
        _parse_info.noExceptionLogic = True
    #
    return _parse_info


import sys

if __name__ == '__main__':
    #
    # This MAIN routine parses a sequence of files provided at the command
    # line.  If '-v' is included, then a verbose parsing output is 
    # generated.
    #
    for arg in sys.argv[1:]:
        if arg == "-v":
            continue
        print "Parsing file '"+arg+"'"
        if '-v' in sys.argv:
            parse_cpp(filename=arg,debug=2,verbose=2)
        else:
            parse_cpp(filename=arg,verbose=2)
        #
        # Print the _parse_info object summary for this file.
        # This illustrates how class inheritance can be used to 
        # deduce class members.
        # 
        print str(_parse_info)


================================================
FILE: cxxtest/python/cxxtest/cxxtest_fog.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

#
# TODO: add line number info
# TODO: add test function names
#

from __future__ import division

import sys
import re
#from os.path import abspath, dirname
#sys.path.insert(0, dirname(dirname(abspath(__file__))))
#sys.path.insert(0, dirname(dirname(abspath(__file__)))+"/cxx_parse")
from cxxtest_misc import abort
import cxx_parser
import re

def cstr( str ):
    '''Convert a string to its C representation'''
    return '"' + re.sub('\\\\', '\\\\\\\\', str ) + '"'

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    suites=[]
    for file in files:
        try:
            print "Parsing file "+file,
            sys.stdout.flush()
            parse_info = cxx_parser.parse_cpp(filename=file,optimize=1)
        except IOError, err:
            print " error."
            print str(err)
            continue
        print "done." 
        sys.stdout.flush()
        #
        # WEH: see if it really makes sense to use parse information to
        # initialize this data.  I don't think so...
        #
        _options.haveStandardLibrary=1
        if not parse_info.noExceptionLogic:
            _options.haveExceptionHandling=1
        #
        keys = list(parse_info.index.keys())
        tpat = re.compile("[Tt][Ee][Ss][Tt]")
        for key in keys:
            if parse_info.index[key].scope_t == "class" and parse_info.is_baseclass(key,"CxxTest::TestSuite"):
                name=parse_info.index[key].name
                suite = { 'name'         : name,
                        'file'         : file,
                        'cfile'        : cstr(file),
                        'line'         : str(parse_info.index[key].lineno),
                        'generated'    : 0,
                        'object'       : 'suite_%s' % name,
                        'dobject'      : 'suiteDescription_%s' % name,
                        'tlist'        : 'Tests_%s' % name,
                        'tests'        : [],
                        'lines'        : [] }
                for fn in parse_info.get_functions(key,quiet=True):
                    tname = fn[0]
                    lineno = str(fn[1])
                    if tname.startswith('createSuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['create'] = str(lineno) # (unknown line)
                    if tname.startswith('destroySuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['destroy'] = str(lineno) # (unknown line)
                    if not tpat.match(tname):
                        # Skip non-test methods
                        continue
                    test = { 'name'   : tname,
                        'suite'  : suite,
                        'class'  : 'TestDescription_suite_%s_%s' % (suite['name'], tname),
                        'object' : 'testDescription_suite_%s_%s' % (suite['name'], tname),
                        'line'   : lineno,
                        }
                    suite['tests'].append(test)
                suites.append(suite)

    if not _options.root:
        ntests = 0
        for suite in suites:
            ntests += len(suite['tests'])
        if ntests == 0:
            abort( 'No tests defined' )
    #
    return [_options, suites]


================================================
FILE: cxxtest/python/cxxtest/cxxtest_misc.py
================================================
#!/usr/bin/python
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

import sys

def abort( problem ):
    '''Print error message and exit'''
    sys.stderr.write( '\n' )
    sys.stderr.write( problem )
    sys.stderr.write( '\n\n' )
    sys.exit(2)


================================================
FILE: cxxtest/python/cxxtest/cxxtest_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

from __future__ import division

import codecs
import re
#import sys
#import getopt
#import glob
from cxxtest.cxxtest_misc import abort

# Global variables
suites = []
suite = None
inBlock = 0
options=None

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    global options
    options=_options
    for file in files:
        scanInputFile(file)
    global suites
    if len(suites) is 0 and not options.root:
        abort( 'No tests defined' )
    return [options,suites]

lineCont_re = re.compile('(.*)\\\s*$')
def scanInputFile(fileName):
    '''Scan single input file for test suites'''
    # mode 'rb' is problematic in python3 - byte arrays don't behave the same as
    # strings.
    # As far as the choice of the default encoding: utf-8 chews through
    # everything that the previous ascii codec could, plus most of new code.
    # TODO: figure out how to do this properly - like autodetect encoding from
    # file header.
    file = codecs.open(fileName, mode='r', encoding='utf-8')
    prev = ""
    lineNo = 0
    contNo = 0
    while 1:
        line = file.readline()
        if not line:
            break
        lineNo += 1

        m = lineCont_re.match(line)
        if m:
            prev += m.group(1) + " "
            contNo += 1
        else:
            scanInputLine( fileName, lineNo - contNo, prev + line )
            contNo = 0
            prev = ""
    if contNo:
        scanInputLine( fileName, lineNo - contNo, prev + line )
        
    closeSuite()
    file.close()

def scanInputLine( fileName, lineNo, line ):
    '''Scan single input line for interesting stuff'''
    scanLineForExceptionHandling( line )
    scanLineForStandardLibrary( line )

    scanLineForSuiteStart( fileName, lineNo, line )

    global suite
    if suite:
        scanLineInsideSuite( suite, lineNo, line )

def scanLineInsideSuite( suite, lineNo, line ):
    '''Analyze line which is part of a suite'''
    global inBlock
    if lineBelongsToSuite( suite, lineNo, line ):
        scanLineForTest( suite, lineNo, line )
        scanLineForCreate( suite, lineNo, line )
        scanLineForDestroy( suite, lineNo, line )

def lineBelongsToSuite( suite, lineNo, line ):
    '''Returns whether current line is part of the current suite.
    This can be false when we are in a generated suite outside of CXXTEST_CODE() blocks
    If the suite is generated, adds the line to the list of lines'''
    if not suite['generated']:
        return 1

    global inBlock
    if not inBlock:
        inBlock = lineStartsBlock( line )
    if inBlock:
        inBlock = addLineToBlock( suite, lineNo, line )
    return inBlock


std_re = re.compile( r"\b(std\s*::|CXXTEST_STD|using\s+namespace\s+std\b|^\s*\#\s*include\s+<[a-z0-9]+>)" )
def scanLineForStandardLibrary( line ):
    '''Check if current line uses standard library'''
    global options
    if not options.haveStandardLibrary and std_re.search(line):
        if not options.noStandardLibrary:
            options.haveStandardLibrary = 1

exception_re = re.compile( r"\b(throw|try|catch|TSM?_ASSERT_THROWS[A-Z_]*)\b" )
def scanLineForExceptionHandling( line ):
    '''Check if current line uses exception handling'''
    global options
    if not options.haveExceptionHandling and exception_re.search(line):
        if not options.noExceptionHandling:
            options.haveExceptionHandling = 1

classdef = '(?:::\s*)?(?:\w+\s*::\s*)*\w+'
baseclassdef = '(?:public|private|protected)\s+%s' % (classdef,)
general_suite = r"\bclass\s+(%s)\s*:(?:\s*%s\s*,)*\s*public\s+" \
                % (classdef, baseclassdef,)
testsuite = '(?:(?:::)?\s*CxxTest\s*::\s*)?TestSuite'
suites_re = { re.compile( general_suite + testsuite ) : None }
generatedSuite_re = re.compile( r'\bCXXTEST_SUITE\s*\(\s*(\w*)\s*\)' )
def scanLineForSuiteStart( fileName, lineNo, line ):
    '''Check if current line starts a new test suite'''
    for i in list(suites_re.items()):
        m = i[0].search( line )
        if m:
            suite = startSuite( m.group(1), fileName, lineNo, 0 )
            if i[1] is not None:
                for test in i[1]['tests']:
                    addTest(suite, test['name'], test['line'])
            break
    m = generatedSuite_re.search( line )
    if m:
        sys.stdout.write( "%s:%s: Warning: Inline test suites are deprecated.\n" % (fileName, lineNo) )
        startSuite( m.group(1), fileName, lineNo, 1 )

def startSuite( name, file, line, generated ):
    '''Start scanning a new suite'''
    global suite
    closeSuite()
    object_name = name.replace(':',"_")
    suite = { 'name'         : name,
              'file'         : file,
              'cfile'        : cstr(file),
              'line'         : line,
              'generated'    : generated,
              'object'       : 'suite_%s' % object_name,
              'dobject'      : 'suiteDescription_%s' % object_name,
              'tlist'        : 'Tests_%s' % object_name,
              'tests'        : [],
              'lines'        : [] }
    suites_re[re.compile( general_suite + name )] = suite
    return suite

def lineStartsBlock( line ):
    '''Check if current line starts a new CXXTEST_CODE() block'''
    return re.search( r'\bCXXTEST_CODE\s*\(', line ) is not None

test_re = re.compile( r'^([^/]|/[^/])*\bvoid\s+([Tt]est\w+)\s*\(\s*(void)?\s*\)' )
def scanLineForTest( suite, lineNo, line ):
    '''Check if current line starts a test'''
    m = test_re.search( line )
    if m:
        addTest( suite, m.group(2), lineNo )

def addTest( suite, name, line ):
    '''Add a test function to the current suite'''
    test = { 'name'   : name,
             'suite'  : suite,
             'class'  : 'TestDescription_%s_%s' % (suite['object'], name),
             'object' : 'testDescription_%s_%s' % (suite['object'], name),
             'line'   : line,
             }
    suite['tests'].append( test )

def addLineToBlock( suite, lineNo, line ):
    '''Append the line to the current CXXTEST_CODE() block'''
    line = fixBlockLine( suite, lineNo, line )
    line = re.sub( r'^.*\{\{', '', line )
    
    e = re.search( r'\}\}', line )
    if e:
        line = line[:e.start()]
    suite['lines'].append( line )
    return e is None

def fixBlockLine( suite, lineNo, line):
    '''Change all [E]TS_ macros used in a line to _[E]TS_ macros with the correct file/line'''
    return re.sub( r'\b(E?TSM?_(ASSERT[A-Z_]*|FAIL))\s*\(',
                   r'_\1(%s,%s,' % (suite['cfile'], lineNo),
                   line, 0 )

create_re = re.compile( r'\bstatic\s+\w+\s*\*\s*createSuite\s*\(\s*(void)?\s*\)' )
def scanLineForCreate( suite, lineNo, line ):
    '''Check if current line defines a createSuite() function'''
    if create_re.search( line ):
        addSuiteCreateDestroy( suite, 'create', lineNo )

destroy_re = re.compile( r'\bstatic\s+void\s+destroySuite\s*\(\s*\w+\s*\*\s*\w*\s*\)' )
def scanLineForDestroy( suite, lineNo, line ):
    '''Check if current line defines a destroySuite() function'''
    if destroy_re.search( line ):
        addSuiteCreateDestroy( suite, 'destroy', lineNo )

def cstr( s ):
    '''Convert a string to its C representation'''
    return '"' + s.replace( '\\', '\\\\' ) + '"'


def addSuiteCreateDestroy( suite, which, line ):
    '''Add createSuite()/destroySuite() to current suite'''
    if which in suite:
        abort( '%s:%s: %sSuite() already declared' % ( suite['file'], str(line), which ) )
    suite[which] = line

def closeSuite():
    '''Close current suite and add it to the list if valid'''
    global suite
    if suite is not None:
        if len(suite['tests']) is not 0:
            verifySuite(suite)
            rememberSuite(suite)
        suite = None

def verifySuite(suite):
    '''Verify current suite is legal'''
    if 'create' in suite and 'destroy' not in suite:
        abort( '%s:%s: Suite %s has createSuite() but no destroySuite()' %
               (suite['file'], suite['create'], suite['name']) )
    elif 'destroy' in suite and 'create' not in suite:
        abort( '%s:%s: Suite %s has destroySuite() but no createSuite()' %
               (suite['file'], suite['destroy'], suite['name']) )

def rememberSuite(suite):
    '''Add current suite to list'''
    global suites
    suites.append( suite )


================================================
FILE: cxxtest/python/cxxtest/cxxtestgen.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8

from __future__ import division
# the above import important for forward-compatibility with python3,
# which is already the default in archlinux!

__all__ = ['main']

import __release__
import os
import sys
import re
import glob
from optparse import OptionParser
import cxxtest_parser

try:
    import cxxtest_fog
    imported_fog=True
except ImportError:
    imported_fog=False

from cxxtest_misc import abort

options = []
suites = []

wrotePreamble = 0
wroteWorld = 0
lastIncluded = ''

def main(args=sys.argv):
    '''The main program'''
    #
    # Reset global state
    #
    global wrotePreamble
    wrotePreamble=0
    global wroteWorld
    wroteWorld=0
    global lastIncluded
    lastIncluded = ''

    global suites
    global options
    files = parseCommandline(args)
    if imported_fog and options.fog:
        [options,suites] = cxxtest_fog.scanInputFiles( files, options )
    else:
        [options,suites] = cxxtest_parser.scanInputFiles( files, options )
    writeOutput()

def parseCommandline(args):
    '''Analyze command line arguments'''
    global imported_fog
    global options
    parser = OptionParser("%prog [options] [<filename> ...]")
    parser.add_option("--version",
                      action="store_true", dest="version", default=False,
                      help="Write the CxxTest version.")
    parser.add_option("-o", "--output",
                      dest="outputFileName", default=None, metavar="NAME",
                      help="Write output to file NAME.")
    parser.add_option("-w","--world", dest="world", default="cxxtest",
                      help="The label of the tests, used to name the XML results.")
    parser.add_option("", "--include", action="append",
                      dest="headers", default=[], metavar="HEADER",
                      help="Include file HEADER in the test runner before other headers.")
    parser.add_option("", "--abort-on-fail",
                      action="store_true", dest="abortOnFail", default=False,
                      help="Abort tests on failed asserts (like xUnit).")
    parser.add_option("", "--main",
                      action="store", dest="main", default="main",
                      help="Specify an alternative name for the main() function.")
    parser.add_option("", "--headers",
                      action="store", dest="header_filename", default=None,
                      help="Specify a filename that contains a list of header files that are processed to generate a test runner.")
    parser.add_option("", "--runner",
                      dest="runner", default="", metavar="CLASS",
                      help="Create a test runner that processes test events using the class CxxTest::CLASS.")
    parser.add_option("", "--gui",
                      dest="gui", metavar="CLASS",
                      help="Create a GUI test runner that processes test events using the class CxxTest::CLASS. (deprecated)")
    parser.add_option("", "--error-printer",
                      action="store_true", dest="error_printer", default=False,
                      help="Create a test runner using the ErrorPrinter class, and allow the use of the standard library.")
    parser.add_option("", "--xunit-printer",
                      action="store_true", dest="xunit_printer", default=False,
                      help="Create a test runner using the XUnitPrinter class.")
    parser.add_option("", "--xunit-file",  dest="xunit_file", default="",
                      help="The file to which the XML summary is written for test runners using the XUnitPrinter class.  The default XML filename is TEST-<world>.xml, where <world> is the value of the --world option.  (default: cxxtest)")
    parser.add_option("", "--have-std",
                      action="store_true", dest="haveStandardLibrary", default=False,
                      help="Use the standard library (even if not found in tests).")
    parser.add_option("", "--no-std",
                      action="store_true", dest="noStandardLibrary", default=False,
                      help="Do not use standard library (even if found in tests).")
    parser.add_option("", "--have-eh",
                      action="store_true", dest="haveExceptionHandling", default=False,
                      help="Use exception handling (even if not found in tests).")
    parser.add_option("", "--no-eh",
                      action="store_true", dest="noExceptionHandling", default=False,
                      help="Do not use exception handling (even if found in tests).")
    parser.add_option("", "--longlong",
                      dest="longlong", default=None, metavar="TYPE",
                      help="Use TYPE as for long long integers.  (default: not supported)")
    parser.add_option("", "--no-static-init",
                      action="store_true", dest="noStaticInit", default=False,
                      help="Do not rely on static initialization in the test runner.")
    parser.add_option("", "--template",
                      dest="templateFileName", default=None, metavar="TEMPLATE",
                      help="Generate the test runner using file TEMPLATE to define a template.")
    parser.add_option("", "--root",
                      action="store_true", dest="root", default=False,
                      help="Write the main() function and global data for a test runner.")
    parser.add_option("", "--part",
                      action="store_true", dest="part", default=False,
                      help="Write the tester classes for a test runner.")
    #parser.add_option("", "--factor",
                      #action="store_true", dest="factor", default=False,
                      #help="Declare the _CXXTEST_FACTOR macro.  (deprecated)")
    if imported_fog:
        fog_help = "Use new FOG C++ parser"
    else:
        fog_help = "Use new FOG C++ parser (disabled)"
    parser.add_option("-f", "--fog-parser",
                        action="store_true",
                        dest="fog",
                        default=False,
                        help=fog_help
                        )

    (options, args) = parser.parse_args(args=args)
    if not options.header_filename is None:
        if not os.path.exists(options.header_filename):
            abort( "ERROR: the file '%s' does not exist!" % options.header_filename )
        INPUT = open(options.header_filename)
        headers = [line.strip() for line in INPUT]
        args.extend( headers )
        INPUT.close()

    if options.fog and not imported_fog:
        abort( "Cannot use the FOG parser.  Check that the 'ply' package is installed.  The 'ordereddict' package is also required if running Python 2.6")

    if options.version:
      printVersion()

    # the cxxtest builder relies on this behaviour! don't remove
    if options.runner == 'none':
        options.runner = None

    if options.xunit_printer or options.runner == "XUnitPrinter":
        options.xunit_printer=True
        options.runner="XUnitPrinter"
        if len(args) > 1:
            if options.xunit_file == "":
                if options.world == "":
                    options.world = "cxxtest"
                options.xunit_file="TEST-"+options.world+".xml"
        elif options.xunit_file == "":
            if options.world == "":
                options.world = "cxxtest"
            options.xunit_file="TEST-"+options.world+".xml"

    if options.error_printer:
      options.runner= "ErrorPrinter"
      options.haveStandardLibrary = True
    
    if options.noStaticInit and (options.root or options.part):
        abort( '--no-static-init cannot be used with --root/--part' )

    if options.gui and not options.runner:
        options.runner = 'StdioPrinter'

    files = setFiles(args[1:])
    if len(files) == 0 and not options.root:
        sys.stderr.write(parser.error("No input files found"))

    return files


def printVersion():
    '''Print CxxTest version and exit'''
    sys.stdout.write( "This is CxxTest version %s.\n" % __release__.__version__ )
    sys.exit(0)

def setFiles(patterns ):
    '''Set input files specified on command line'''
    files = expandWildcards( patterns )
    return files

def expandWildcards( patterns ):
    '''Expand all wildcards in an array (glob)'''
    fileNames = []
    for pathName in patterns:
        patternFiles = glob.glob( pathName )
        for fileName in patternFiles:
            fileNames.append( fixBackslashes( fileName ) )
    return fileNames

def fixBackslashes( fileName ):
    '''Convert backslashes to slashes in file name'''
    return re.sub( r'\\', '/', fileName, 0 )


def writeOutput():
    '''Create output file'''
    if options.templateFileName:
        writeTemplateOutput()
    else:
        writeSimpleOutput()

def writeSimpleOutput():
    '''Create output not based on template'''
    output = startOutputFile()
    writePreamble( output )
    if options.root or not options.part:
        writeMain( output )

    if len(suites) > 0:
        output.write("bool "+suites[0]['object']+"_init = false;\n")

    writeWorld( output )
    output.close()

include_re = re.compile( r"\s*\#\s*include\s+<cxxtest/" )
preamble_re = re.compile( r"^\s*<CxxTest\s+preamble>\s*$" )
world_re = re.compile( r"^\s*<CxxTest\s+world>\s*$" )
def writeTemplateOutput():
    '''Create output based on template file'''
    template = open(options.templateFileName)
    output = startOutputFile()
    while 1:
        line = template.readline()
        if not line:
            break;
        if include_re.search( line ):
            writePreamble( output )
            output.write( line )
        elif preamble_re.search( line ):
            writePreamble( output )
        elif world_re.search( line ):
            if len(suites) > 0:
                output.write("bool "+suites[0]['object']+"_init = false;\n")
            writeWorld( output )
        else:
            output.write( line )
    template.close()
    output.close()

def startOutputFile():
    '''Create output file and write header'''
    if options.outputFileName is not None:
        output = open( options.outputFileName, 'w' )
    else:
        output = sys.stdout
    output.write( "/* Generated file, do not edit */\n\n" )
    return output

def writePreamble( output ):
    '''Write the CxxTest header (#includes and #defines)'''
    global wrotePreamble
    if wrotePreamble: return
    output.write( "#ifndef CXXTEST_RUNNING\n" )
    output.write( "#define CXXTEST_RUNNING\n" )
    output.write( "#endif\n" )
    output.write( "\n" )
    if options.xunit_printer:
        output.write( "#include <fstream>\n" )
    if options.haveStandardLibrary:
        output.write( "#define _CXXTEST_HAVE_STD\n" )
    if options.haveExceptionHandling:
        output.write( "#define _CXXTEST_HAVE_EH\n" )
    if options.abortOnFail:
        output.write( "#define _CXXTEST_ABORT_TEST_ON_FAIL\n" )
    if options.longlong:
        output.write( "#define _CXXTEST_LONGLONG %s\n" % options.longlong )
    #if options.factor:
        #output.write( "#define _CXXTEST_FACTOR\n" )
    for header in options.headers:
        output.write( "#include \"%s\"\n" % header )
    output.write( "#include <cxxtest/TestListener.h>\n" )
    output.write( "#include <cxxtest/TestTracker.h>\n" )
    output.write( "#include <cxxtest/TestRunner.h>\n" )
    output.write( "#include <cxxtest/RealDescriptions.h>\n" )
    output.write( "#include <cxxtest/TestMain.h>\n" )
    if options.runner:
        output.write( "#include <cxxtest/%s.h>\n" % options.runner )
    if options.gui:
        output.write( "#include <cxxtest/%s.h>\n" % options.gui )
    output.write( "\n" )
    wrotePreamble = 1

def writeMain( output ):
    '''Write the main() function for the test runner'''
    if not (options.gui or options.runner):
       return
    output.write( 'int %s( int argc, char *argv[] ) {\n' % options.main )
    output.write( ' int status;\n' )
    if options.noStaticInit:
        output.write( ' CxxTest::initialize();\n' )
    if options.gui:
        tester_t = "CxxTest::GuiTuiRunner<CxxTest::%s, CxxTest::%s> " % (options.gui, options.runner)
    else:
        tester_t = "CxxTest::%s" % (options.runner)
    if options.xunit_printer:
       output.write( '    std::ofstream ofstr("%s");\n' % options.xunit_file )
       output.write( '    %s tmp(ofstr);\n' % tester_t )
       output.write( '    CxxTest::RealWorldDescription::_worldName = "%s";\n' % options.world )
    else:
       output.write( '    %s tmp;\n' % tester_t )
    output.write( '    status = CxxTest::Main<%s>( tmp, argc, argv );\n' % tester_t )
    output.write( '    return status;\n')
    output.write( '}\n' )


def writeWorld( output ):
    '''Write the world definitions'''
    global wroteWorld
    if wroteWorld: return
    writePreamble( output )
    writeSuites( output )
    if options.root or not options.part:
        writeRoot( output )
        writeWorldDescr( output )
    if options.noStaticInit:
        writeInitialize( output )
    wroteWorld = 1

def writeSuites(output):
    '''Write all TestDescriptions and SuiteDescriptions'''
    for suite in suites:
        writeInclude( output, suite['file'] )
        if isGenerated(suite):
            generateSuite( output, suite )
        if isDynamic(suite):
            writeSuitePointer( output, suite )
        else:
            writeSuiteObject( output, suite )
        writeTestList( output, suite )
        writeSuiteDescription( output, suite )
        writeTestDescriptions( output, suite )

def isGenerated(suite):
    '''Checks whether a suite class should be created'''
    return suite['generated']

def isDynamic(suite):
    '''Checks whether a suite is dynamic'''
    return 'create' in suite

def writeInclude(output, file):
    '''Add #include "file" statement'''
    global lastIncluded
    if file == lastIncluded: return
    output.writelines( [ '#include "', file, '"\n\n' ] )
    lastIncluded = file

def generateSuite( output, suite ):
    '''Write a suite declared with CXXTEST_SUITE()'''
    output.write( 'class %s : public CxxTest::TestSuite {\n' % suite['name'] )
    output.write( 'public:\n' )
    for line in suite['lines']:
        output.write(line)
    output.write( '};\n\n' )

def writeSuitePointer( output, suite ):
    '''Create static suite pointer object for dynamic suites'''
    if options.noStaticInit:
        output.write( 'static %s *%s;\n\n' % (suite['name'], suite['object']) )
    else:
        output.write( 'static %s *%s = 0;\n\n' % (suite['name'], suite['object']) )

def writeSuiteObject( output, suite ):
    '''Create static suite object for non-dynamic suites'''
    output.writelines( [ "static ", suite['name'], " ", suite['object'], ";\n\n" ] )

def writeTestList( output, suite ):
    '''Write the head of the test linked list for a suite'''
    if options.noStaticInit:
        output.write( 'static CxxTest::List %s;\n' % suite['tlist'] )
    else:
        output.write( 'static CxxTest::List %s = { 0, 0 };\n' % suite['tlist'] )

def writeWorldDescr( output ):
    '''Write the static name of the world name'''
    if options.noStaticInit:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName;\n' )
    else:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName = "cxxtest";\n' )

def writeTestDescriptions( output, suite ):
    '''Write all test descriptions for a suite'''
    for test in suite['tests']:
        writeTestDescription( output, suite, test )

def writeTestDescription( output, suite, test ):
    '''Write test description object'''
    output.write( 'static class %s : public CxxTest::RealTestDescription {\n' % test['class'] )
    output.write( 'public:\n' )
    if not options.noStaticInit:
        output.write( ' %s() : CxxTest::RealTestDescription( %s, %s, %s, "%s" ) {}\n' %
                      (test['class'], suite['tlist'], suite['dobject'], test['line'], test['name']) )
    output.write( ' void runTest() { %s }\n' % runBody( suite, test ) )
    output.write( '} %s;\n\n' % test['object'] )

def runBody( suite, test ):
    '''Body of TestDescription::run()'''
    if isDynamic(suite): return dynamicRun( suite, test )
    else: return staticRun( suite, test )

def dynamicRun( suite, test ):
    '''Body of TestDescription::run() for test in a dynamic suite'''
    return 'if ( ' + suite['object'] + ' ) ' + suite['object'] + '->' + test['name'] + '();'
    
def staticRun( suite, test ):
    '''Body of TestDescription::run() for test in a non-dynamic suite'''
    return suite['object'] + '.' + test['name'] + '();'
    
def writeSuiteDescription( output, suite ):
    '''Write SuiteDescription object'''
    if isDynamic( suite ):
        writeDynamicDescription( output, suite )
    else:
        writeStaticDescription( output, suite )

def writeDynamicDescription( output, suite ):
    '''Write SuiteDescription for a dynamic suite'''
    output.write( 'CxxTest::DynamicSuiteDescription<%s> %s' % (suite['name'], suite['dobject']) )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s, %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['tlist'],
                       suite['object'], suite['create'], suite['destroy']) )
    output.write( ';\n\n' )

def writeStaticDescription( output, suite ):
    '''Write SuiteDescription for a static suite'''
    output.write( 'CxxTest::StaticSuiteDescription %s' % suite['dobject'] )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['object'], suite['tlist']) )
    output.write( ';\n\n' )

def writeRoot(output):
    '''Write static members of CxxTest classes'''
    output.write( '#include <cxxtest/Root.cpp>\n' )

def writeInitialize(output):
    '''Write CxxTest::initialize(), which replaces static initialization'''
    output.write( 'namespace CxxTest {\n' )
    output.write( ' void initialize()\n' )
    output.write( ' {\n' )
    for suite in suites:
        output.write( '  %s.initialize();\n' % suite['tlist'] )
        if isDynamic(suite):
            output.write( '  %s = 0;\n' % suite['object'] )
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s, %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['tlist'], suite['object'], suite['create'], suite['destroy']) )
        else:
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['object'], suite['tlist']) )

        for test in suite['tests']:
            output.write( '  %s.initialize( %s, %s, %s, "%s" );\n' %
                          (test['object'], suite['tlist'], suite['dobject'], test['line'], test['name']) )

    output.write( ' }\n' )
    output.write( '}\n' )


================================================
FILE: cxxtest/python/python3/cxxtest/__init__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

"""cxxtest: A Python package that supports the CxxTest test framework for C/C++.

.. _CxxTest: http://cxxtest.tigris.org/

CxxTest is a unit testing framework for C++ that is similar in
spirit to JUnit, CppUnit, and xUnit. CxxTest is easy to use because
it does not require precompiling a CxxTest testing library, it
employs no advanced features of C++ (e.g. RTTI) and it supports a
very flexible form of test discovery.

The cxxtest Python package includes capabilities for parsing C/C++ source files and generating
CxxTest drivers.
"""

from cxxtest.__release__ import __version__, __date__
__date__
__version__

__maintainer__ = "William E. Hart"
__maintainer_email__ = "whart222@gmail.com"
__license__ = "LGPL"
__url__ = "http://cxxtest.tigris.org/"

from cxxtest.cxxtestgen import *


================================================
FILE: cxxtest/python/python3/cxxtest/__release__.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

""" Release Information for cxxtest """

__version__ = '4.0.2'
__date__ = "2012-01-02"


================================================
FILE: cxxtest/python/python3/cxxtest/cxx_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8

#
# This is a PLY parser for the entire ANSI C++ grammar.  This grammar was 
# adapted from the FOG grammar developed by E. D. Willink.  See
#
#    http://www.computing.surrey.ac.uk/research/dsrg/fog/
#
# for further details.
#
# The goal of this grammar is to extract information about class, function and
# class method declarations, along with their associated scope.  Thus, this 
# grammar can be used to analyze classes in an inheritance heirarchy, and then
# enumerate the methods in a derived class.
#
# This grammar parses blocks of <>, (), [] and {} in a generic manner.  Thus,
# There are several capabilities that this grammar does not support:
#
# 1. Ambiguous template specification.  This grammar cannot parse template
#       specifications that do not have paired <>'s in their declaration.  In
#       particular, ambiguous declarations like
#
#           foo<A, c<3 >();
#
#       cannot be correctly parsed.
#
# 2. Template class specialization.  Although the goal of this grammar is to
#       extract class information, specialization of templated classes is
#       not supported.  When a template class definition is parsed, it's 
#       declaration is archived without information about the template
#       parameters.  Class specializations will be stored separately, and 
#       thus they can be processed after the fact.  However, this grammar
#       does not attempt to correctly process properties of class inheritence
#       when template class specialization is employed.
#

#
# TODO: document usage of this file
#


import os
import ply.lex as lex
import ply.yacc as yacc
import re
try:
    from collections import OrderedDict
except ImportError:
    from ordereddict import OrderedDict

lexer = None
scope_lineno = 0
identifier_lineno = {}
_parse_info=None
_parsedata=None
noExceptionLogic = True

def ply_init(data):
    global _parsedata
    _parsedata=data


class Scope(object):

    def __init__(self,name,abs_name,scope_t,base_classes,lineno):
        self.function=[]
        self.name=name
        self.scope_t=scope_t
        self.sub_scopes=[]
        self.base_classes=base_classes
        self.abs_name=abs_name
        self.lineno=lineno
   
    def insert(self,scope):
        self.sub_scopes.append(scope)


class CppInfo(object):

    def __init__(self, filter=None):
        self.verbose=0
        if filter is None:
            self.filter=re.compile("[Tt][Ee][Ss][Tt]|createSuite|destroySuite")
        else:
            self.filter=filter
        self.scopes=[""]
        self.index=OrderedDict()
        self.index[""]=Scope("","::","namespace",[],1)
        self.function=[]

    def push_scope(self,ns,scope_t,base_classes=[]):
        name = self.scopes[-1]+"::"+ns
        if self.verbose>=2:
            print("-- Starting "+scope_t+" "+name)
        self.scopes.append(name)
        self.index[name] = Scope(ns,name,scope_t,base_classes,scope_lineno-1)

    def pop_scope(self):
        scope = self.scopes.pop()
        if self.verbose>=2:
            print("-- Stopping "+scope)
        return scope

    def add_function(self, fn):
        fn = str(fn)
        if self.filter.search(fn):
            self.index[self.scopes[-1]].function.append((fn, identifier_lineno.get(fn,lexer.lineno-1)))
            tmp = self.scopes[-1]+"::"+fn
            if self.verbose==2:
                print("-- Function declaration "+fn+"  "+tmp)
            elif self.verbose==1:
                print("-- Function declaration "+tmp)

    def get_functions(self,name,quiet=False):
        if name == "::":
            name = ""
        scope = self.index[name]
        fns=scope.function
        for key in scope.base_classes:
            cname = self.find_class(key,scope)
            if cname is None:
                if not quiet:
                    print("Defined classes: ",list(self.index.keys()))
                    print("WARNING: Unknown class "+key)
            else:
                fns += self.get_functions(cname,quiet)
        return fns
        
    def find_class(self,name,scope):
        if ':' in name:
            if name in self.index:
                return name
            else:
                return None           
        tmp = scope.abs_name.split(':')
        name1 = ":".join(tmp[:-1] + [name])
        if name1 in self.index:
            return name1
        name2 = "::"+name
        if name2 in self.index:
            return name2
        return None

    def __repr__(self):
        return str(self)

    def is_baseclass(self,cls,base):
        '''Returns true if base is a base-class of cls'''
        if cls in self.index:
            bases = self.index[cls]
        elif "::"+cls in self.index:
            bases = self.index["::"+cls]
        else:
            return False
            #raise IOError, "Unknown class "+cls
        if base in bases.base_classes:
            return True
        for name in bases.base_classes:
            if self.is_baseclass(name,base):
                return True
        return False

    def __str__(self):
        ans=""
        keys = list(self.index.keys())
        keys.sort()
        for key in keys:
            scope = self.index[key]
            ans += scope.scope_t+" "+scope.abs_name+"\n"
            if scope.scope_t == "class":
                ans += "  Base Classes: "+str(scope.base_classes)+"\n"
                for fn in self.get_functions(scope.abs_name):
                    ans += "  "+fn+"\n"
            else:
                for fn in scope.function:
                    ans += "  "+fn+"\n"
        return ans


def flatten(x):
    """Flatten nested list"""
    try:
        strtypes = str
    except: # for python3 etc
        strtypes = (str, bytes)

    result = []
    for el in x:
        if hasattr(el, "__iter__") and not isinstance(el, strtypes):
            result.extend(flatten(el))
        else:
            result.append(el)
    return result

#
# The lexer (and/or a preprocessor) is expected to identify the following
#
#  Punctuation:
#
#
literals = "+-*/%^&|~!<>=:()?.\'\"\\@$;,"

#
reserved = {
    'private' : 'PRIVATE',
    'protected' : 'PROTECTED',
    'public' : 'PUBLIC',

    'bool' : 'BOOL',
    'char' : 'CHAR',
    'double' : 'DOUBLE',
    'float' : 'FLOAT',
    'int' : 'INT',
    'long' : 'LONG',
    'short' : 'SHORT',
    'signed' : 'SIGNED',
    'unsigned' : 'UNSIGNED',
    'void' : 'VOID',
    'wchar_t' : 'WCHAR_T',

    'class' : 'CLASS',
    'enum' : 'ENUM',
    'namespace' : 'NAMESPACE',
    'struct' : 'STRUCT',
    'typename' : 'TYPENAME',
    'union' : 'UNION',

    'const' : 'CONST',
    'volatile' : 'VOLATILE',

    'auto' : 'AUTO',
    'explicit' : 'EXPLICIT',
    'export' : 'EXPORT',
    'extern' : 'EXTERN',
    '__extension__' : 'EXTENSION',
    'friend' : 'FRIEND',
    'inline' : 'INLINE',
    'mutable' : 'MUTABLE',
    'register' : 'REGISTER',
    'static' : 'STATIC',
    'template' : 'TEMPLATE',
    'typedef' : 'TYPEDEF',
    'using' : 'USING',
    'virtual' : 'VIRTUAL',

    'asm' : 'ASM',
    'break' : 'BREAK',
    'case' : 'CASE',
    'catch' : 'CATCH',
    'const_cast' : 'CONST_CAST',
    'continue' : 'CONTINUE',
    'default' : 'DEFAULT',
    'delete' : 'DELETE',
    'do' : 'DO',
    'dynamic_cast' : 'DYNAMIC_CAST',
    'else' : 'ELSE',
    'false' : 'FALSE',
    'for' : 'FOR',
    'goto' : 'GOTO',
    'if' : 'IF',
    'new' : 'NEW',
    'operator' : 'OPERATOR',
    'reinterpret_cast' : 'REINTERPRET_CAST',
    'return' : 'RETURN',
    'sizeof' : 'SIZEOF',
    'static_cast' : 'STATIC_CAST',
    'switch' : 'SWITCH',
    'this' : 'THIS',
    'throw' : 'THROW',
    'true' : 'TRUE',
    'try' : 'TRY',
    'typeid' : 'TYPEID',
    'while' : 'WHILE',
    '"C"' : 'CLiteral',
    '"C++"' : 'CppLiteral',

    '__attribute__' : 'ATTRIBUTE',
    '__cdecl__' : 'CDECL',
    '__typeof' : 'uTYPEOF',
    'typeof' : 'TYPEOF', 

    'CXXTEST_STD' : 'CXXTEST_STD'
}
   
tokens = [
    "CharacterLiteral",
    "FloatingLiteral",
    "Identifier",
    "IntegerLiteral",
    "StringLiteral",
 "RBRACE",
 "LBRACE",
 "RBRACKET",
 "LBRACKET",
 "ARROW",
 "ARROW_STAR",
 "DEC",
 "EQ",
 "GE",
 "INC",
 "LE",
 "LOG_AND",
 "LOG_OR",
 "NE",
 "SHL",
 "SHR",
 "ASS_ADD",
 "ASS_AND",
 "ASS_DIV",
 "ASS_MOD",
 "ASS_MUL",
 "ASS_OR",
 "ASS_SHL",
 "ASS_SHR",
 "ASS_SUB",
 "ASS_XOR",
 "DOT_STAR",
 "ELLIPSIS",
 "SCOPE",
] + list(reserved.values())

t_ignore = " \t\r"

t_LBRACE = r"(\{)|(<%)"
t_RBRACE = r"(\})|(%>)"
t_LBRACKET = r"(\[)|(<:)"
t_RBRACKET = r"(\])|(:>)"
t_ARROW = r"->"
t_ARROW_STAR = r"->\*"
t_DEC = r"--"
t_EQ = r"=="
t_GE = r">="
t_INC = r"\+\+"
t_LE = r"<="
t_LOG_AND = r"&&"
t_LOG_OR = r"\|\|"
t_NE = r"!="
t_SHL = r"<<"
t_SHR = r">>"
t_ASS_ADD = r"\+="
t_ASS_AND = r"&="
t_ASS_DIV = r"/="
t_ASS_MOD = r"%="
t_ASS_MUL = r"\*="
t_ASS_OR  = r"\|="
t_ASS_SHL = r"<<="
t_ASS_SHR = r">>="
t_ASS_SUB = r"-="
t_ASS_XOR = r"^="
t_DOT_STAR = r"\.\*"
t_ELLIPSIS = r"\.\.\."
t_SCOPE = r"::"

# Discard comments
def t_COMMENT(t):
    r'(/\*(.|\n)*?\*/)|(//.*?\n)|(\#.*?\n)'
    t.lexer.lineno += t.value.count("\n")

t_IntegerLiteral = r'(0x[0-9A-F]+)|([0-9]+(L){0,1})'
t_FloatingLiteral = r"[0-9]+[eE\.\+-]+[eE\.\+\-0-9]+"
t_CharacterLiteral = r'\'([^\'\\]|\\.)*\''
#t_StringLiteral = r'"([^"\\]|\\.)*"'
def t_StringLiteral(t):
    r'"([^"\\]|\\.)*"'
    t.type = reserved.get(t.value,'StringLiteral')
    return t

def t_Identifier(t):
    r"[a-zA-Z_][a-zA-Z_0-9\.]*"
    t.type = reserved.get(t.value,'Identifier')
    return t


def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    #raise IOError, "Parse error"
    #t.lexer.skip()

def t_newline(t):
    r'[\n]+'
    t.lexer.lineno += len(t.value)

precedence = (
    ( 'right', 'SHIFT_THERE', 'REDUCE_HERE_MOSTLY', 'SCOPE'),
    ( 'nonassoc', 'ELSE', 'INC', 'DEC', '+', '-', '*', '&', 'LBRACKET', 'LBRACE', '<', ':', ')')
    )

start = 'translation_unit'

#
#  The %prec resolves the 14.2-3 ambiguity:
#  Identifier '<' is forced to go through the is-it-a-template-name test
#  All names absorb TEMPLATE with the name, so that no template_test is 
#  performed for them.  This requires all potential declarations within an 
#  expression to perpetuate this policy and thereby guarantee the ultimate 
#  coverage of explicit_instantiation.
#
#  The %prec also resolves a conflict in identifier : which is forced to be a 
#  shift of a label for a labeled-statement rather than a reduction for the 
#  name of a bit-field or generalised constructor.  This is pretty dubious 
#  syntactically but correct for all semantic possibilities.  The shift is 
#  only activated when the ambiguity exists at the start of a statement. 
#  In this context a bit-field declaration or constructor definition are not 
#  allowed.
#

def p_identifier(p):
    '''identifier : Identifier
    |               CXXTEST_STD '(' Identifier ')'
    '''
    if p[1][0] in ('t','T','c','d'):
        identifier_lineno[p[1]] = p.lineno(1)
    p[0] = p[1]

def p_id(p):
    '''id :                         identifier %prec SHIFT_THERE
    |                               template_decl
    |                               TEMPLATE id
    '''
    p[0] = get_rest(p)

def p_global_scope(p):
    '''global_scope :               SCOPE
    '''
    p[0] = get_rest(p)

def p_id_scope(p):
    '''id_scope : id SCOPE'''
    p[0] = get_rest(p)

def p_id_scope_seq(p):
    '''id_scope_seq :                id_scope
    |                                id_scope id_scope_seq
    '''
    p[0] = get_rest(p)

#
#  A :: B :: C; is ambiguous How much is type and how much name ?
#  The %prec maximises the (type) length which is the 7.1-2 semantic constraint.
#
def p_nested_id(p):
    '''nested_id :                  id %prec SHIFT_THERE
    |                               id_scope nested_id
    '''
    p[0] = get_rest(p)

def p_scoped_id(p):
    '''scoped_id :                  nested_id
    |                               global_scope nested_id
    |                               id_scope_seq
    |                               global_scope id_scope_seq
    '''
    global scope_lineno
    scope_lineno = lexer.lineno
    data = flatten(get_rest(p))
    if data[0] != None:
        p[0] = "".join(data)

#
#  destructor_id has to be held back to avoid a conflict with a one's 
#  complement as per 5.3.1-9, It gets put back only when scoped or in a 
#  declarator_id, which is only used as an explicit member name.
#  Declarations of an unscoped destructor are always parsed as a one's 
#  complement.
#
def p_destructor_id(p):
    '''destructor_id :              '~' id
    |                               TEMPLATE destructor_id
    '''
    p[0]=get_rest(p)

#def p_template_id(p):
#    '''template_id :                empty
#    |                               TEMPLATE
#    '''
#    pass

def p_template_decl(p):
    '''template_decl :              identifier '<' nonlgt_seq_opt '>'
    '''
    #
    # WEH: should we include the lt/gt symbols to indicate that this is a
    # template class?  How is that going to be used later???
    #
    #p[0] = [p[1] ,"<",">"]
    p[0] = p[1]

def p_special_function_id(p):
    '''special_function_id :        conversion_function_id
    |                               operator_function_id
    |                               TEMPLATE special_function_id
    '''
    p[0]=get_rest(p)

def p_nested_special_function_id(p):
    '''nested_special_function_id : special_function_id
    |                               id_scope destructor_id
    |                               id_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

def p_scoped_special_function_id(p):
    '''scoped_special_function_id : nested_special_function_id
    |                               global_scope nested_special_function_id
    '''
    p[0]=get_rest(p)

# declarator-id is all names in all scopes, except reserved words
def p_declarator_id(p):
    '''declarator_id :              scoped_id
    |                               scoped_special_function_id
    |                               destructor_id
    '''
    p[0]=p[1]

#
# The standard defines pseudo-destructors in terms of type-name, which is 
# class/enum/typedef, of which class-name is covered by a normal destructor. 
# pseudo-destructors are supposed to support ~int() in templates, so the 
# grammar here covers built-in names. Other names are covered by the lack 
# of identifier/type discrimination.
#
def p_built_in_type_id(p):
    '''built_in_type_id :           built_in_type_specifier
    |                               built_in_type_id built_in_type_specifier
    '''
    pass

def p_pseudo_destructor_id(p):
    '''pseudo_destructor_id :       built_in_type_id SCOPE '~' built_in_type_id
    |                               '~' built_in_type_id
    |                               TEMPLATE pseudo_destructor_id
    '''
    pass

def p_nested_pseudo_destructor_id(p):
    '''nested_pseudo_destructor_id : pseudo_destructor_id
    |                               id_scope nested_pseudo_destructor_id
    '''
    pass

def p_scoped_pseudo_destructor_id(p):
    '''scoped_pseudo_destructor_id : nested_pseudo_destructor_id
    |                               global_scope scoped_pseudo_destructor_id
    '''
    pass

#-------------------------------------------------------------------------------
# A.2 Lexical conventions
#-------------------------------------------------------------------------------
#

def p_literal(p):
    '''literal :                    IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               TRUE
    |                               FALSE
    '''
    pass

#-------------------------------------------------------------------------------
# A.3 Basic concepts
#-------------------------------------------------------------------------------
def p_translation_unit(p):
    '''translation_unit :           declaration_seq_opt
    '''
    pass

#-------------------------------------------------------------------------------
# A.4 Expressions
#-------------------------------------------------------------------------------
#
#  primary_expression covers an arbitrary sequence of all names with the 
#  exception of an unscoped destructor, which is parsed as its unary expression 
#  which is the correct disambiguation (when ambiguous).  This eliminates the 
#  traditional A(B) meaning A B ambiguity, since we never have to tack an A 
#  onto the front of something that might start with (. The name length got 
#  maximised ab initio. The downside is that semantic interpretation must split 
#  the names up again.
#
#  Unification of the declaration and expression syntax means that unary and 
#  binary pointer declarator operators:
#      int * * name
#  are parsed as binary and unary arithmetic operators (int) * (*name). Since 
#  type information is not used
#  ambiguities resulting from a cast
#      (cast)*(value)
#  are resolved to favour the binary rather than the cast unary to ease AST 
#  clean-up. The cast-call ambiguity must be resolved to the cast to ensure 
#  that (a)(b)c can be parsed.
#
#  The problem of the functional cast ambiguity
#      name(arg)
#  as call or declaration is avoided by maximising the name within the parsing 
#  kernel. So  primary_id_expression picks up 
#      extern long int const var = 5;
#  as an assignment to the syntax parsed as "extern long int const var". The 
#  presence of two names is parsed so that "extern long into const" is 
#  distinguished from "var" considerably simplifying subsequent 
#  semantic resolution.
#
#  The generalised name is a concatenation of potential type-names (scoped 
#  identifiers or built-in sequences) plus optionally one of the special names 
#  such as an operator-function-id, conversion-function-id or destructor as the 
#  final name. 
#

def get_rest(p):
    return [p[i] for i in range(1, len(p))]

def p_primary_expression(p):
    '''primary_expression :         literal
    |                               THIS
    |                               suffix_decl_specified_ids
    |                               abstract_expression %prec REDUCE_HERE_MOSTLY
    '''
    p[0] = get_rest(p)

#
#  Abstract-expression covers the () and [] of abstract-declarators.
#
def p_abstract_expression(p):
    '''abstract_expression :        parenthesis_clause
    |                               LBRACKET bexpression_opt RBRACKET
    |                               TEMPLATE abstract_expression
    '''
    pass

def p_postfix_expression(p):
    '''postfix_expression :         primary_expression
    |                               postfix_expression parenthesis_clause
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET
    |                               postfix_expression LBRACKET bexpression_opt RBRACKET attributes
    |                               postfix_expression '.' declarator_id
    |                               postfix_expression '.' scoped_pseudo_destructor_id
    |                               postfix_expression ARROW declarator_id
    |                               postfix_expression ARROW scoped_pseudo_destructor_id   
    |                               postfix_expression INC
    |                               postfix_expression DEC
    |                               DYNAMIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               STATIC_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               REINTERPRET_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               CONST_CAST '<' nonlgt_seq_opt '>' '(' expression ')'
    |                               TYPEID parameters_clause
    '''
    #print "HERE",str(p[1])
    p[0] = get_rest(p)

def p_bexpression_opt(p):
    '''bexpression_opt :            empty
    |                               bexpression
    '''
    pass

def p_bexpression(p):
    '''bexpression :                nonbracket_seq
    |                               nonbracket_seq bexpression_seq bexpression_clause nonbracket_seq_opt
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_seq(p):
    '''bexpression_seq :            empty
    |                               bexpression_seq bexpression_clause nonbracket_seq_opt
    '''
    pass

def p_bexpression_clause(p):
    '''bexpression_clause :          LBRACKET bexpression_opt RBRACKET
    '''
    pass


def p_expression_list_opt(p):
    '''expression_list_opt :        empty
    |                               expression_list
    '''
    pass

def p_expression_list(p):
    '''expression_list :            assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    pass

def p_unary_expression(p):
    '''unary_expression :           postfix_expression
    |                               INC cast_expression
    |                               DEC cast_expression
    |                               ptr_operator cast_expression
    |                               suffix_decl_specified_scope star_ptr_operator cast_expression
    |                               '+' cast_expression
    |                               '-' cast_expression
    |                               '!' cast_expression
    |                               '~' cast_expression
    |                               SIZEOF unary_expression
    |                               new_expression
    |                               global_scope new_expression
    |                               delete_expression
    |                               global_scope delete_expression
    '''
    p[0] = get_rest(p)

def p_delete_expression(p):
    '''delete_expression :          DELETE cast_expression
    '''
    pass

def p_new_expression(p):
    '''new_expression :             NEW new_type_id new_initializer_opt
    |                               NEW parameters_clause new_type_id new_initializer_opt
    |                               NEW parameters_clause
    |                               NEW parameters_clause parameters_clause new_initializer_opt
    '''
    pass

def p_new_type_id(p):
    '''new_type_id :                type_specifier ptr_operator_seq_opt
    |                               type_specifier new_declarator
    |                               type_specifier new_type_id
    '''
    pass

def p_new_declarator(p):
    '''new_declarator :             ptr_operator new_declarator
    |                               direct_new_declarator
    '''
    pass

def p_direct_new_declarator(p):
    '''direct_new_declarator :      LBRACKET bexpression_opt RBRACKET
    |                               direct_new_declarator LBRACKET bexpression RBRACKET
    '''
    pass

def p_new_initializer_opt(p):
    '''new_initializer_opt :        empty
    |                               '(' expression_list_opt ')'
    '''
    pass

#
# cast-expression is generalised to support a [] as well as a () prefix. This covers the omission of 
# DELETE[] which when followed by a parenthesised expression was ambiguous. It also covers the gcc 
# indexed array initialisation for free.
#
def p_cast_expression(p):
    '''cast_expression :            unary_expression
    |                               abstract_expression cast_expression
    '''
    p[0] = get_rest(p)

def p_pm_expression(p):
    '''pm_expression :              cast_expression
    |                               pm_expression DOT_STAR cast_expression
    |                               pm_expression ARROW_STAR cast_expression
    '''
    p[0] = get_rest(p)

def p_multiplicative_expression(p):
    '''multiplicative_expression :  pm_expression
    |                               multiplicative_expression star_ptr_operator pm_expression
    |                               multiplicative_expression '/' pm_expression
    |                               multiplicative_expression '%' pm_expression
    '''
    p[0] = get_rest(p)

def p_additive_expression(p):
    '''additive_expression :        multiplicative_expression
    |                               additive_expression '+' multiplicative_expression
    |                               additive_expression '-' multiplicative_expression
    '''
    p[0] = get_rest(p)

def p_shift_expression(p):
    '''shift_expression :           additive_expression
    |                               shift_expression SHL additive_expression
    |                               shift_expression SHR additive_expression
    '''
    p[0] = get_rest(p)

#    |                               relational_expression '<' shift_expression
#    |                               relational_expression '>' shift_expression
#    |                               relational_expression LE shift_expression
#    |                               relational_expression GE shift_expression
def p_relational_expression(p):
    '''relational_expression :      shift_expression
    '''
    p[0] = get_rest(p)

def p_equality_expression(p):
    '''equality_expression :        relational_expression
    |                               equality_expression EQ relational_expression
    |                               equality_expression NE relational_expression
    '''
    p[0] = get_rest(p)

def p_and_expression(p):
    '''and_expression :             equality_expression
    |                               and_expression '&' equality_expression
    '''
    p[0] = get_rest(p)

def p_exclusive_or_expression(p):
    '''exclusive_or_expression :    and_expression
    |                               exclusive_or_expression '^' and_expression
    '''
    p[0] = get_rest(p)

def p_inclusive_or_expression(p):
    '''inclusive_or_expression :    exclusive_or_expression
    |                               inclusive_or_expression '|' exclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_and_expression(p):
    '''logical_and_expression :     inclusive_or_expression
    |                               logical_and_expression LOG_AND inclusive_or_expression
    '''
    p[0] = get_rest(p)

def p_logical_or_expression(p):
    '''logical_or_expression :      logical_and_expression
    |                               logical_or_expression LOG_OR logical_and_expression
    '''
    p[0] = get_rest(p)

def p_conditional_expression(p):
    '''conditional_expression :     logical_or_expression
    |                               logical_or_expression '?' expression ':' assignment_expression
    '''
    p[0] = get_rest(p)


#
# assignment-expression is generalised to cover the simple assignment of a braced initializer in order to 
# contribute to the coverage of parameter-declaration and init-declaration.
#
#    |                               logical_or_expression assignment_operator assignment_expression
def p_assignment_expression(p):
    '''assignment_expression :      conditional_expression
    |                               logical_or_expression assignment_operator nonsemicolon_seq
    |                               logical_or_expression '=' braced_initializer
    |                               throw_expression
    '''
    p[0]=get_rest(p)

def p_assignment_operator(p):
    '''assignment_operator :        '=' 
                           | ASS_ADD
                           | ASS_AND
                           | ASS_DIV
                           | ASS_MOD
                           | ASS_MUL
                           | ASS_OR
                           | ASS_SHL
                           | ASS_SHR
                           | ASS_SUB
                           | ASS_XOR
    '''
    pass

#
# expression is widely used and usually single-element, so the reductions are arranged so that a
# single-element expression is returned as is. Multi-element expressions are parsed as a list that
# may then behave polymorphically as an element or be compacted to an element.
#

def p_expression(p):
    '''expression :                 assignment_expression
    |                               expression_list ',' assignment_expression
    '''
    p[0] = get_rest(p)

def p_constant_expression(p):
    '''constant_expression :        conditional_expression
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.5 Statements
#---------------------------------------------------------------------------------------------------
# Parsing statements is easy once simple_declaration has been generalised to cover expression_statement.
#
#
# The use of extern here is a hack.  The 'extern "C" {}' block gets parsed
# as a function, so when nested 'extern "C"' declarations exist, they don't
# work because the block is viewed as a list of statements... :(
#
def p_statement(p):
    '''statement :                  compound_statement
    |                               declaration_statement
    |                               try_block
    |                               labeled_statement
    |                               selection_statement
    |                               iteration_statement
    |                               jump_statement
    '''
    pass

def p_compound_statement(p):
    '''compound_statement :         LBRACE statement_seq_opt RBRACE
    '''
    pass

def p_statement_seq_opt(p):
    '''statement_seq_opt :          empty
    |                               statement_seq_opt statement
    '''
    pass

#
#  The dangling else conflict is resolved to the innermost if.
#
def p_selection_statement(p):
    '''selection_statement :        IF '(' condition ')' statement    %prec SHIFT_THERE
    |                               IF '(' condition ')' statement ELSE statement
    |                               SWITCH '(' condition ')' statement
    '''
    pass

def p_condition_opt(p):
    '''condition_opt :              empty
    |                               condition
    '''
    pass

def p_condition(p):
    '''condition :                  nonparen_seq
    |                               nonparen_seq condition_seq parameters_clause nonparen_seq_opt
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_condition_seq(p):
    '''condition_seq :              empty
    |                               condition_seq parameters_clause nonparen_seq_opt
    '''
    pass

def p_labeled_statement(p):
    '''labeled_statement :          identifier ':' statement
    |                               CASE constant_expression ':' statement
    |                               DEFAULT ':' statement
    '''
    pass

def p_try_block(p):
    '''try_block :                  TRY compound_statement handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_jump_statement(p):
    '''jump_statement :             BREAK ';'
    |                               CONTINUE ';'
    |                               RETURN nonsemicolon_seq ';'
    |                               GOTO identifier ';'
    '''
    pass

def p_iteration_statement(p):
    '''iteration_statement :        WHILE '(' condition ')' statement
    |                               DO statement WHILE '(' expression ')' ';'
    |                               FOR '(' nonparen_seq_opt ')' statement
    '''
    pass

def p_declaration_statement(p):
    '''declaration_statement :      block_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.6 Declarations
#---------------------------------------------------------------------------------------------------
def p_compound_declaration(p):
    '''compound_declaration :       LBRACE declaration_seq_opt RBRACE                            
    '''
    pass

def p_declaration_seq_opt(p):
    '''declaration_seq_opt :        empty
    |                               declaration_seq_opt declaration
    '''
    pass

def p_declaration(p):
    '''declaration :                block_declaration
    |                               function_definition
    |                               template_declaration
    |                               explicit_specialization
    |                               specialised_declaration
    '''
    pass

def p_specialised_declaration(p):
    '''specialised_declaration :    linkage_specification
    |                               namespace_definition
    |                               TEMPLATE specialised_declaration
    '''
    pass

def p_block_declaration(p):
    '''block_declaration :          simple_declaration
    |                               specialised_block_declaration
    '''
    pass

def p_specialised_block_declaration(p):
    '''specialised_block_declaration :      asm_definition
    |                               namespace_alias_definition
    |                               using_declaration
    |                               using_directive
    |                               TEMPLATE specialised_block_declaration
    '''
    pass

def p_simple_declaration(p):
    '''simple_declaration :         ';'
    |                               init_declaration ';'
    |                               init_declarations ';'
    |                               decl_specifier_prefix simple_declaration
    '''
    global _parse_info
    if len(p) == 3:
        if p[2] == ";":
            decl = p[1]
        else:
            decl = p[2]
        if decl is not None:
            fp = flatten(decl)
            if len(fp) >= 2 and fp[0] is not None and fp[0]!="operator" and fp[1] == '(':
                p[0] = fp[0]
                _parse_info.add_function(fp[0])

#
#  A decl-specifier following a ptr_operator provokes a shift-reduce conflict for * const name which is resolved in favour of the pointer, and implemented by providing versions of decl-specifier guaranteed not to start with a cv_qualifier.  decl-specifiers are implemented type-centrically. That is the semantic constraint that there must be a type is exploited to impose structure, but actually eliminate very little syntax. built-in types are multi-name and so need a different policy.
#
#  non-type decl-specifiers are bound to the left-most type in a decl-specifier-seq, by parsing from the right and attaching suffixes to the right-hand type. Finally residual prefixes attach to the left.                
#
def p_suffix_built_in_decl_specifier_raw(p):
    '''suffix_built_in_decl_specifier_raw : built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw built_in_type_specifier
    |                               suffix_built_in_decl_specifier_raw decl_specifier_suffix
    '''
    pass

def p_suffix_built_in_decl_specifier(p):
    '''suffix_built_in_decl_specifier :     suffix_built_in_decl_specifier_raw
    |                               TEMPLATE suffix_built_in_decl_specifier
    '''
    pass

#    |                                       id_scope_seq
#    |                                       SCOPE id_scope_seq
def p_suffix_named_decl_specifier(p):
    '''suffix_named_decl_specifier :        scoped_id 
    |                               elaborate_type_specifier 
    |                               suffix_named_decl_specifier decl_specifier_suffix
    '''
    p[0]=get_rest(p)

def p_suffix_named_decl_specifier_bi(p):
    '''suffix_named_decl_specifier_bi :     suffix_named_decl_specifier
    |                               suffix_named_decl_specifier suffix_built_in_decl_specifier_raw
    '''
    p[0] = get_rest(p)
    #print "HERE",get_rest(p)

def p_suffix_named_decl_specifiers(p):
    '''suffix_named_decl_specifiers :       suffix_named_decl_specifier_bi
    |                               suffix_named_decl_specifiers suffix_named_decl_specifier_bi
    '''
    p[0] = get_rest(p)

def p_suffix_named_decl_specifiers_sf(p):
    '''suffix_named_decl_specifiers_sf :    scoped_special_function_id
    |                               suffix_named_decl_specifiers
    |                               suffix_named_decl_specifiers scoped_special_function_id
    '''
    #print "HERE",get_rest(p)
    p[0] = get_rest(p)

def p_suffix_decl_specified_ids(p):
    '''suffix_decl_specified_ids :          suffix_built_in_decl_specifier
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers_sf
    |                               suffix_named_decl_specifiers_sf
    '''
    if len(p) == 3:
        p[0] = p[2]
    else:
        p[0] = p[1]

def p_suffix_decl_specified_scope(p):
    '''suffix_decl_specified_scope : suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier suffix_named_decl_specifiers SCOPE
    |                               suffix_built_in_decl_specifier SCOPE
    '''
    p[0] = get_rest(p)

def p_decl_specifier_affix(p):
    '''decl_specifier_affix :       storage_class_specifier
    |                               function_specifier
    |                               FRIEND
    |                               TYPEDEF
    |                               cv_qualifier
    '''
    pass

def p_decl_specifier_suffix(p):
    '''decl_specifier_suffix :      decl_specifier_affix
    '''
    pass

def p_decl_specifier_prefix(p):
    '''decl_specifier_prefix :      decl_specifier_affix
    |                               TEMPLATE decl_specifier_prefix
    '''
    pass

def p_storage_class_specifier(p):
    '''storage_class_specifier :    REGISTER 
    |                               STATIC 
    |                               MUTABLE
    |                               EXTERN                  %prec SHIFT_THERE
    |                               EXTENSION
    |                               AUTO
    '''
    pass

def p_function_specifier(p):
    '''function_specifier :         EXPLICIT
    |                               INLINE
    |                               VIRTUAL
    '''
    pass

def p_type_specifier(p):
    '''type_specifier :             simple_type_specifier
    |                               elaborate_type_specifier
    |                               cv_qualifier
    '''
    pass

def p_elaborate_type_specifier(p):
    '''elaborate_type_specifier :   class_specifier
    |                               enum_specifier
    |                               elaborated_type_specifier
    |                               TEMPLATE elaborate_type_specifier
    '''
    pass

def p_simple_type_specifier(p):
    '''simple_type_specifier :      scoped_id
    |                               scoped_id attributes
    |                               built_in_type_specifier
    '''
    p[0] = p[1]

def p_built_in_type_specifier(p):
    '''built_in_type_specifier : Xbuilt_in_type_specifier
    |                            Xbuilt_in_type_specifier attributes
    '''
    pass

def p_attributes(p):
    '''attributes :                 attribute
    |                               attributes attribute
    '''
    pass

def p_attribute(p):
    '''attribute :                  ATTRIBUTE '(' parameters_clause ')'
    '''

def p_Xbuilt_in_type_specifier(p):
    '''Xbuilt_in_type_specifier :    CHAR 
    | WCHAR_T 
    | BOOL 
    | SHORT 
    | INT 
    | LONG 
    | SIGNED 
    | UNSIGNED 
    | FLOAT 
    | DOUBLE 
    | VOID
    | uTYPEOF parameters_clause
    | TYPEOF parameters_clause
    '''
    pass

#
#  The over-general use of declaration_expression to cover decl-specifier-seq_opt declarator in a function-definition means that
#      class X { };
#  could be a function-definition or a class-specifier.
#      enum X { };
#  could be a function-definition or an enum-specifier.
#  The function-definition is not syntactically valid so resolving the false conflict in favour of the
#  elaborated_type_specifier is correct.
#
def p_elaborated_type_specifier(p):
    '''elaborated_type_specifier :  class_key scoped_id %prec SHIFT_THERE
    |                               elaborated_enum_specifier
    |                               TYPENAME scoped_id
    '''
    pass

def p_elaborated_enum_specifier(p):
    '''elaborated_enum_specifier :  ENUM scoped_id   %prec SHIFT_THERE
    '''
    pass

def p_enum_specifier(p):
    '''enum_specifier :             ENUM scoped_id enumerator_clause
    |                               ENUM enumerator_clause
    '''
    pass

def p_enumerator_clause(p):
    '''enumerator_clause :          LBRACE enumerator_list_ecarb
    |                               LBRACE enumerator_list enumerator_list_ecarb
    |                               LBRACE enumerator_list ',' enumerator_definition_ecarb
    '''
    pass

def p_enumerator_list_ecarb(p):
    '''enumerator_list_ecarb :      RBRACE
    '''
    pass

def p_enumerator_definition_ecarb(p):
    '''enumerator_definition_ecarb :        RBRACE
    '''
    pass

def p_enumerator_definition_filler(p):
    '''enumerator_definition_filler :       empty
    '''
    pass

def p_enumerator_list_head(p):
    '''enumerator_list_head :       enumerator_definition_filler
    |                               enumerator_list ',' enumerator_definition_filler
    '''
    pass

def p_enumerator_list(p):
    '''enumerator_list :            enumerator_list_head enumerator_definition
    '''
    pass

def p_enumerator_definition(p):
    '''enumerator_definition :      enumerator
    |                               enumerator '=' constant_expression
    '''
    pass

def p_enumerator(p):
    '''enumerator :                 identifier
    '''
    pass

def p_namespace_definition(p):
    '''namespace_definition :       NAMESPACE scoped_id push_scope compound_declaration
    |                               NAMESPACE push_scope compound_declaration
    '''
    global _parse_info
    scope = _parse_info.pop_scope()

def p_namespace_alias_definition(p):
    '''namespace_alias_definition : NAMESPACE scoped_id '=' scoped_id ';'
    '''
    pass

def p_push_scope(p):
    '''push_scope :                 empty'''
    global _parse_info
    if p[-2] == "namespace":
        scope=p[-1]
    else:
        scope=""
    _parse_info.push_scope(scope,"namespace")

def p_using_declaration(p):
    '''using_declaration :          USING declarator_id ';'
    |                               USING TYPENAME declarator_id ';'
    '''
    pass

def p_using_directive(p):
    '''using_directive :            USING NAMESPACE scoped_id ';'
    '''
    pass

#    '''asm_definition :             ASM '(' StringLiteral ')' ';'
def p_asm_definition(p):
    '''asm_definition :             ASM '(' nonparen_seq_opt ')' ';'
    '''
    pass

def p_linkage_specification(p):
    '''linkage_specification :      EXTERN CLiteral declaration
    |                               EXTERN CLiteral compound_declaration
    |                               EXTERN CppLiteral declaration
    |                               EXTERN CppLiteral compound_declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.7 Declarators
#---------------------------------------------------------------------------------------------------
#
# init-declarator is named init_declaration to reflect the embedded decl-specifier-seq_opt
#

def p_init_declarations(p):
    '''init_declarations :          assignment_expression ',' init_declaration
    |                               init_declarations ',' init_declaration
    '''
    p[0]=get_rest(p)

def p_init_declaration(p):
    '''init_declaration :           assignment_expression
    '''
    p[0]=get_rest(p)

def p_star_ptr_operator(p):
    '''star_ptr_operator :          '*'
    |                               star_ptr_operator cv_qualifier
    '''
    pass

def p_nested_ptr_operator(p):
    '''nested_ptr_operator :        star_ptr_operator
    |                               id_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator(p):
    '''ptr_operator :               '&'
    |                               nested_ptr_operator
    |                               global_scope nested_ptr_operator
    '''
    pass

def p_ptr_operator_seq(p):
    '''ptr_operator_seq :           ptr_operator
    |                               ptr_operator ptr_operator_seq
    '''
    pass

#
# Independently coded to localise the shift-reduce conflict: sharing just needs another %prec
#
def p_ptr_operator_seq_opt(p):
    '''ptr_operator_seq_opt :       empty %prec SHIFT_THERE
    |                               ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_cv_qualifier_seq_opt(p):
    '''cv_qualifier_seq_opt :       empty
    |                               cv_qualifier_seq_opt cv_qualifier
    '''
    pass

# TODO: verify that we should include attributes here
def p_cv_qualifier(p):
    '''cv_qualifier :               CONST 
    |                               VOLATILE
    |                               attributes
    '''
    pass

def p_type_id(p):
    '''type_id :                    type_specifier abstract_declarator_opt
    |                               type_specifier type_id
    '''
    pass

def p_abstract_declarator_opt(p):
    '''abstract_declarator_opt :    empty
    |                               ptr_operator abstract_declarator_opt
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator_opt(p):
    '''direct_abstract_declarator_opt :     empty
    |                               direct_abstract_declarator
    '''
    pass

def p_direct_abstract_declarator(p):
    '''direct_abstract_declarator : direct_abstract_declarator_opt parenthesis_clause
    |                               direct_abstract_declarator_opt LBRACKET RBRACKET
    |                               direct_abstract_declarator_opt LBRACKET bexpression RBRACKET
    '''
    pass

def p_parenthesis_clause(p):
    '''parenthesis_clause :         parameters_clause cv_qualifier_seq_opt
    |                               parameters_clause cv_qualifier_seq_opt exception_specification
    '''
    p[0] = ['(',')']

def p_parameters_clause(p):
    '''parameters_clause :          '(' condition_opt ')'
    '''
    p[0] = ['(',')']

#
# A typed abstract qualifier such as
#      Class * ...
# looks like a multiply, so pointers are parsed as their binary operation equivalents that
# ultimately terminate with a degenerate right hand term.
#
def p_abstract_pointer_declaration(p):
    '''abstract_pointer_declaration :       ptr_operator_seq
    |                               multiplicative_expression star_ptr_operator ptr_operator_seq_opt
    '''
    pass

def p_abstract_parameter_declaration(p):
    '''abstract_parameter_declaration :     abstract_pointer_declaration
    |                               and_expression '&'
    |                               and_expression '&' abstract_pointer_declaration
    '''
    pass

def p_special_parameter_declaration(p):
    '''special_parameter_declaration :      abstract_parameter_declaration
    |                               abstract_parameter_declaration '=' assignment_expression
    |                               ELLIPSIS
    '''
    pass

def p_parameter_declaration(p):
    '''parameter_declaration :      assignment_expression
    |                               special_parameter_declaration
    |                               decl_specifier_prefix parameter_declaration
    '''
    pass

#
# function_definition includes constructor, destructor, implicit int definitions too.  A local destructor is successfully parsed as a function-declaration but the ~ was treated as a unary operator.  constructor_head is the prefix ambiguity between a constructor and a member-init-list starting with a bit-field.
#
def p_function_definition(p):
    '''function_definition :        ctor_definition
    |                               func_definition
    '''
    pass

def p_func_definition(p):
    '''func_definition :            assignment_expression function_try_block
    |                               assignment_expression function_body
    |                               decl_specifier_prefix func_definition
    '''
    global _parse_info
    if p[2] is not None and p[2][0] == '{':
        decl = flatten(p[1])
        #print "HERE",decl
        if decl[-1] == ')':
            decl=decl[-3]
        else:
            decl=decl[-1]
        p[0] = decl
        if decl != "operator":
            _parse_info.add_function(decl)
    else:
        p[0] = p[2]

def p_ctor_definition(p):
    '''ctor_definition :            constructor_head function_try_block
    |                               constructor_head function_body
    |                               decl_specifier_prefix ctor_definition
    '''
    if p[2] is None or p[2][0] == "try" or p[2][0] == '{':
        p[0]=p[1]
    else:
        p[0]=p[1]

def p_constructor_head(p):
    '''constructor_head :           bit_field_init_declaration
    |                               constructor_head ',' assignment_expression
    '''
    p[0]=p[1]

def p_function_try_block(p):
    '''function_try_block :         TRY function_block handler_seq
    '''
    global noExceptionLogic
    noExceptionLogic=False
    p[0] = ['try']

def p_function_block(p):
    '''function_block :             ctor_initializer_opt function_body
    '''
    pass

def p_function_body(p):
    '''function_body :              LBRACE nonbrace_seq_opt RBRACE 
    '''
    p[0] = ['{','}']

def p_initializer_clause(p):
    '''initializer_clause :         assignment_expression
    |                               braced_initializer
    '''
    pass

def p_braced_initializer(p):
    '''braced_initializer :         LBRACE initializer_list RBRACE
    |                               LBRACE initializer_list ',' RBRACE
    |                               LBRACE RBRACE
    '''
    pass

def p_initializer_list(p):
    '''initializer_list :           initializer_clause
    |                               initializer_list ',' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.8 Classes
#---------------------------------------------------------------------------------------------------
#
#  An anonymous bit-field declaration may look very like inheritance:
#      const int B = 3;
#      class A : B ;
#  The two usages are too distant to try to create and enforce a common prefix so we have to resort to
#  a parser hack by backtracking. Inheritance is much the most likely so we mark the input stream context
#  and try to parse a base-clause. If we successfully reach a { the base-clause is ok and inheritance was
#  the correct choice so we unmark and continue. If we fail to find the { an error token causes 
#  back-tracking to the alternative parse in elaborated_type_specifier which regenerates the : and 
#  declares unconditional success.
#

def p_class_specifier_head(p):
    '''class_specifier_head :       class_key scoped_id ':' base_specifier_list LBRACE
    |                               class_key ':' base_specifier_list LBRACE
    |                               class_key scoped_id LBRACE
    |                               class_key LBRACE
    '''
    global _parse_info
    base_classes=[]
    if len(p) == 6:
        scope = p[2]
        base_classes = p[4]
    elif len(p) == 4:
        scope = p[2]
    elif len(p) == 5:
        base_classes = p[3]
    else:
        scope = ""
    _parse_info.push_scope(scope,p[1],base_classes)
    

def p_class_key(p):
    '''class_key :                  CLASS 
    | STRUCT 
    | UNION
    '''
    p[0] = p[1]

def p_class_specifier(p):
    '''class_specifier :            class_specifier_head member_specification_opt RBRACE
    '''
    scope = _parse_info.pop_scope()

def p_member_specification_opt(p):
    '''member_specification_opt :   empty
    |                               member_specification_opt member_declaration
    '''
    pass

def p_member_declaration(p):
    '''member_declaration :         accessibility_specifier
    |                               simple_member_declaration
    |                               function_definition
    |                               using_declaration
    |                               template_declaration
    '''
    p[0] = get_rest(p)
    #print "Decl",get_rest(p)

#
#  The generality of constructor names (there need be no parenthesised argument list) means that that
#          name : f(g), h(i)
#  could be the start of a constructor or the start of an anonymous bit-field. An ambiguity is avoided by
#  parsing the ctor-initializer of a function_definition as a bit-field.
#
def p_simple_member_declaration(p):
    '''simple_member_declaration :  ';'
    |                               assignment_expression ';'
    |                               constructor_head ';'
    |                               member_init_declarations ';'
    |                               decl_specifier_prefix simple_member_declaration
    '''
    global _parse_info
    decl = flatten(get_rest(p))
    if len(decl) >= 4 and decl[-3] == "(":
        _parse_info.add_function(decl[-4])

def p_member_init_declarations(p):
    '''member_init_declarations :   assignment_expression ',' member_init_declaration
    |                               constructor_head ',' bit_field_init_declaration
    |                               member_init_declarations ',' member_init_declaration
    '''
    pass

def p_member_init_declaration(p):
    '''member_init_declaration :    assignment_expression
    |                               bit_field_init_declaration
    '''
    pass

def p_accessibility_specifier(p):
    '''accessibility_specifier :    access_specifier ':'
    '''
    pass

def p_bit_field_declaration(p):
    '''bit_field_declaration :      assignment_expression ':' bit_field_width
    |                               ':' bit_field_width
    '''
    if len(p) == 4:
        p[0]=p[1]

def p_bit_field_width(p):
    '''bit_field_width :            logical_or_expression
    |                               logical_or_expression '?' bit_field_width ':' bit_field_width
    '''
    pass

def p_bit_field_init_declaration(p):
    '''bit_field_init_declaration : bit_field_declaration
    |                               bit_field_declaration '=' initializer_clause
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.9 Derived classes
#---------------------------------------------------------------------------------------------------
def p_base_specifier_list(p):
    '''base_specifier_list :        base_specifier
    |                               base_specifier_list ',' base_specifier
    '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]+[p[3]]

def p_base_specifier(p):
    '''base_specifier :             scoped_id
    |                               access_specifier base_specifier
    |                               VIRTUAL base_specifier
    '''
    if len(p) == 2:
        p[0] = p[1]
    else:
        p[0] = p[2]

def p_access_specifier(p):
    '''access_specifier :           PRIVATE 
    |                               PROTECTED 
    |                               PUBLIC
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.10 Special member functions
#---------------------------------------------------------------------------------------------------
def p_conversion_function_id(p):
    '''conversion_function_id :     OPERATOR conversion_type_id
    '''
    p[0] = ['operator']

def p_conversion_type_id(p):
    '''conversion_type_id :         type_specifier ptr_operator_seq_opt
    |                               type_specifier conversion_type_id
    '''
    pass

#
#  Ctor-initialisers can look like a bit field declaration, given the generalisation of names:
#      Class(Type) : m1(1), m2(2) { }
#      NonClass(bit_field) : int(2), second_variable, ...
#  The grammar below is used within a function_try_block or function_definition.
#  See simple_member_declaration for use in normal member function_definition.
#
def p_ctor_initializer_opt(p):
    '''ctor_initializer_opt :       empty
    |                               ctor_initializer
    '''
    pass

def p_ctor_initializer(p):
    '''ctor_initializer :           ':' mem_initializer_list
    '''
    pass

def p_mem_initializer_list(p):
    '''mem_initializer_list :       mem_initializer
    |                               mem_initializer_list_head mem_initializer
    '''
    pass

def p_mem_initializer_list_head(p):
    '''mem_initializer_list_head :  mem_initializer_list ','
    '''
    pass

def p_mem_initializer(p):
    '''mem_initializer :            mem_initializer_id '(' expression_list_opt ')'
    '''
    pass

def p_mem_initializer_id(p):
    '''mem_initializer_id :         scoped_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.11 Overloading
#---------------------------------------------------------------------------------------------------

def p_operator_function_id(p):
    '''operator_function_id :       OPERATOR operator
    |                               OPERATOR '(' ')'
    |                               OPERATOR LBRACKET RBRACKET
    |                               OPERATOR '<'
    |                               OPERATOR '>'
    |                               OPERATOR operator '<' nonlgt_seq_opt '>'
    '''
    p[0] = ["operator"]

#
#  It is not clear from the ANSI standard whether spaces are permitted in delete[]. If not then it can
#  be recognised and returned as DELETE_ARRAY by the lexer. Assuming spaces are permitted there is an
#  ambiguity created by the over generalised nature of expressions. operator new is a valid delarator-id
#  which we may have an undimensioned array of. Semantic rubbish, but syntactically valid. Since the
#  array form is covered by the declarator consideration we can exclude the operator here. The need
#  for a semantic rescue can be eliminated at the expense of a couple of shift-reduce conflicts by
#  removing the comments on the next four lines.
#
def p_operator(p):
    '''operator :                   NEW
    |                               DELETE
    |                               '+'
    |                               '-'
    |                               '*'
    |                               '/'
    |                               '%'
    |                               '^'
    |                               '&'
    |                               '|'
    |                               '~'
    |                               '!'
    |                               '='
    |                               ASS_ADD
    |                               ASS_SUB
    |                               ASS_MUL
    |                               ASS_DIV
    |                               ASS_MOD
    |                               ASS_XOR
    |                               ASS_AND
    |                               ASS_OR
    |                               SHL
    |                               SHR
    |                               ASS_SHR
    |                               ASS_SHL
    |                               EQ
    |                               NE
    |                               LE
    |                               GE
    |                               LOG_AND
    |                               LOG_OR
    |                               INC
    |                               DEC
    |                               ','
    |                               ARROW_STAR
    |                               ARROW
    '''
    p[0]=p[1]

#    |                               IF
#    |                               SWITCH
#    |                               WHILE
#    |                               FOR
#    |                               DO
def p_reserved(p):
    '''reserved :                   PRIVATE
    |                               CLiteral
    |                               CppLiteral
    |                               IF
    |                               SWITCH
    |                               WHILE
    |                               FOR
    |                               DO
    |                               PROTECTED
    |                               PUBLIC
    |                               BOOL
    |                               CHAR
    |                               DOUBLE
    |                               FLOAT
    |                               INT
    |                               LONG
    |                               SHORT
    |                               SIGNED
    |                               UNSIGNED
    |                               VOID
    |                               WCHAR_T
    |                               CLASS
    |                               ENUM
    |                               NAMESPACE
    |                               STRUCT
    |                               TYPENAME
    |                               UNION
    |                               CONST
    |                               VOLATILE
    |                               AUTO
    |                               EXPLICIT
    |                               EXPORT
    |                               EXTERN
    |                               FRIEND
    |                               INLINE
    |                               MUTABLE
    |                               REGISTER
    |                               STATIC
    |                               TEMPLATE
    |                               TYPEDEF
    |                               USING
    |                               VIRTUAL
    |                               ASM
    |                               BREAK
    |                               CASE
    |                               CATCH
    |                               CONST_CAST
    |                               CONTINUE
    |                               DEFAULT
    |                               DYNAMIC_CAST
    |                               ELSE
    |                               FALSE
    |                               GOTO
    |                               OPERATOR
    |                               REINTERPRET_CAST
    |                               RETURN
    |                               SIZEOF
    |                               STATIC_CAST
    |                               THIS
    |                               THROW
    |                               TRUE
    |                               TRY
    |                               TYPEID
    |                               ATTRIBUTE
    |                               CDECL
    |                               TYPEOF
    |                               uTYPEOF
    '''
    if p[1] in ('try', 'catch', 'throw'):
        global noExceptionLogic
        noExceptionLogic=False

#---------------------------------------------------------------------------------------------------
# A.12 Templates
#---------------------------------------------------------------------------------------------------
def p_template_declaration(p):
    '''template_declaration :       template_parameter_clause declaration
    |                               EXPORT template_declaration
    '''
    pass

def p_template_parameter_clause(p):
    '''template_parameter_clause :  TEMPLATE '<' nonlgt_seq_opt '>'
    '''
    pass

#
#  Generalised naming makes identifier a valid declaration, so TEMPLATE identifier is too.
#  The TEMPLATE prefix is therefore folded into all names, parenthesis_clause and decl_specifier_prefix.
#
# explicit_instantiation:           TEMPLATE declaration
#
def p_explicit_specialization(p):
    '''explicit_specialization :    TEMPLATE '<' '>' declaration
    '''
    pass

#---------------------------------------------------------------------------------------------------
# A.13 Exception Handling
#---------------------------------------------------------------------------------------------------
def p_handler_seq(p):
    '''handler_seq :                handler
    |                               handler handler_seq
    '''
    pass

def p_handler(p):
    '''handler :                    CATCH '(' exception_declaration ')' compound_statement
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_declaration(p):
    '''exception_declaration :      parameter_declaration
    '''
    pass

def p_throw_expression(p):
    '''throw_expression :           THROW
    |                               THROW assignment_expression
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_exception_specification(p):
    '''exception_specification :    THROW '(' ')'
    |                               THROW '(' type_id_list ')'
    '''
    global noExceptionLogic
    noExceptionLogic=False

def p_type_id_list(p):
    '''type_id_list :               type_id
    |                               type_id_list ',' type_id
    '''
    pass

#---------------------------------------------------------------------------------------------------
# Misc productions
#---------------------------------------------------------------------------------------------------
def p_nonsemicolon_seq(p):
    '''nonsemicolon_seq :           empty
    |                               nonsemicolon_seq nonsemicolon
    '''
    pass

def p_nonsemicolon(p):
    '''nonsemicolon :               misc
    |                               '('
    |                               ')'
    |                               '<'
    |                               '>'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonparen_seq_opt(p):
    '''nonparen_seq_opt :           empty
    |                               nonparen_seq_opt nonparen
    '''
    pass

def p_nonparen_seq(p):
    '''nonparen_seq :               nonparen
    |                               nonparen_seq nonparen
    '''
    pass

def p_nonparen(p):
    '''nonparen :                   misc
    |                               '<'
    |                               '>'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbracket_seq_opt(p):
    '''nonbracket_seq_opt :         empty
    |                               nonbracket_seq_opt nonbracket
    '''
    pass

def p_nonbracket_seq(p):
    '''nonbracket_seq :             nonbracket
    |                               nonbracket_seq nonbracket
    '''
    pass

def p_nonbracket(p):
    '''nonbracket :                 misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonbrace_seq_opt(p):
    '''nonbrace_seq_opt :           empty
    |                               nonbrace_seq_opt nonbrace
    '''
    pass

def p_nonbrace(p):
    '''nonbrace :                   misc
    |                               '<'
    |                               '>'
    |                               '('
    |                               ')'
    |                               ';'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               LBRACE nonbrace_seq_opt RBRACE
    '''
    pass

def p_nonlgt_seq_opt(p):
    '''nonlgt_seq_opt :             empty
    |                               nonlgt_seq_opt nonlgt
    '''
    pass

def p_nonlgt(p):
    '''nonlgt :                     misc
    |                               '('
    |                               ')'
    |                               LBRACKET nonbracket_seq_opt RBRACKET
    |                               '<' nonlgt_seq_opt '>'
    |                               ';'
    '''
    pass

def p_misc(p):
    '''misc :                       operator
    |                               identifier
    |                               IntegerLiteral
    |                               CharacterLiteral
    |                               FloatingLiteral
    |                               StringLiteral
    |                               reserved
    |                               '?'
    |                               ':'
    |                               '.'
    |                               SCOPE
    |                               ELLIPSIS
    |                               EXTENSION
    '''
    pass

def p_empty(p):
    '''empty : '''
    pass


#
# Compute column.
#     input is the input text string
#     token is a token instance
#
def _find_column(input,token):
    ''' TODO '''
    i = token.lexpos
    while i > 0:
        if input[i] == '\n': break
        i -= 1
    column = (token.lexpos - i)+1
    return column

def p_error(p):
    if p is None:
        tmp = "Syntax error at end of file."
    else:
        tmp = "Syntax error at token "
        if p.type is "":
            tmp = tmp + "''"
        else:
            tmp = tmp + str(p.type)
        tmp = tmp + " with value '"+str(p.value)+"'"
        tmp = tmp + " in line " + str(lexer.lineno-1)
        tmp = tmp + " at column "+str(_find_column(_parsedata,p))
    raise IOError( tmp )


#
# The function that performs the parsing
#
def parse_cpp(data=None, filename=None, debug=0, optimize=0, verbose=False, func_filter=None):
    if debug > 0:
        print("Debugging parse_cpp!")
        #
        # Always remove the parser.out file, which is generated to create debugging
        #
        if os.path.exists("parser.out"):
            os.remove("parser.out")
        #
        # Remove the parsetab.py* files.  These apparently need to be removed
        # to ensure the creation of a parser.out file.
        #
        if os.path.exists("parsetab.py"):
           os.remove("parsetab.py")
        if os.path.exists("parsetab.pyc"):
           os.remove("parsetab.pyc")
        global debugging
        debugging=True
    #
    # Build lexer
    #
    global lexer
    lexer = lex.lex()
    #
    # Initialize parse object
    #
    global _parse_info
    _parse_info = CppInfo(filter=func_filter)
    _parse_info.verbose=verbose
    #
    # Build yaccer
    #
    write_table = not os.path.exists("parsetab.py")
    yacc.yacc(debug=debug, optimize=optimize, write_tables=write_table)
    #
    # Parse the file
    #
    global _parsedata
    if not data is None:
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data,debug=debug)
    elif not filename is None:
        f = open(filename)
        data = f.read()
        f.close()
        _parsedata=data
        ply_init(_parsedata)
        yacc.parse(data, debug=debug)
    else:
        return None
    #
    if not noExceptionLogic:
        _parse_info.noExceptionLogic = False
    else:
        for key in identifier_lineno:
            if 'ASSERT_THROWS' in key:
                _parse_info.noExceptionLogic = False
                break
        _parse_info.noExceptionLogic = True
    #
    return _parse_info


import sys

if __name__ == '__main__':
    #
    # This MAIN routine parses a sequence of files provided at the command
    # line.  If '-v' is included, then a verbose parsing output is 
    # generated.
    #
    for arg in sys.argv[1:]:
        if arg == "-v":
            continue
        print("Parsing file '"+arg+"'")
        if '-v' in sys.argv:
            parse_cpp(filename=arg,debug=2,verbose=2)
        else:
            parse_cpp(filename=arg,verbose=2)
        #
        # Print the _parse_info object summary for this file.
        # This illustrates how class inheritance can be used to 
        # deduce class members.
        # 
        print(str(_parse_info))


================================================
FILE: cxxtest/python/python3/cxxtest/cxxtest_fog.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

#
# TODO: add line number info
# TODO: add test function names
#


import sys
import re
#from os.path import abspath, dirname
#sys.path.insert(0, dirname(dirname(abspath(__file__))))
#sys.path.insert(0, dirname(dirname(abspath(__file__)))+"/cxx_parse")
from .cxxtest_misc import abort
from . import cxx_parser
import re

def cstr( str ):
    '''Convert a string to its C representation'''
    return '"' + re.sub('\\\\', '\\\\\\\\', str ) + '"'

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    suites=[]
    for file in files:
        try:
            print("Parsing file "+file, end=' ')
            sys.stdout.flush()
            parse_info = cxx_parser.parse_cpp(filename=file,optimize=1)
        except IOError as err:
            print(" error.")
            print(str(err))
            continue
        print("done.") 
        sys.stdout.flush()
        #
        # WEH: see if it really makes sense to use parse information to
        # initialize this data.  I don't think so...
        #
        _options.haveStandardLibrary=1
        if not parse_info.noExceptionLogic:
            _options.haveExceptionHandling=1
        #
        keys = list(parse_info.index.keys())
        tpat = re.compile("[Tt][Ee][Ss][Tt]")
        for key in keys:
            if parse_info.index[key].scope_t == "class" and parse_info.is_baseclass(key,"CxxTest::TestSuite"):
                name=parse_info.index[key].name
                suite = { 'name'         : name,
                        'file'         : file,
                        'cfile'        : cstr(file),
                        'line'         : str(parse_info.index[key].lineno),
                        'generated'    : 0,
                        'object'       : 'suite_%s' % name,
                        'dobject'      : 'suiteDescription_%s' % name,
                        'tlist'        : 'Tests_%s' % name,
                        'tests'        : [],
                        'lines'        : [] }
                for fn in parse_info.get_functions(key,quiet=True):
                    tname = fn[0]
                    lineno = str(fn[1])
                    if tname.startswith('createSuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['create'] = str(lineno) # (unknown line)
                    if tname.startswith('destroySuite'):
                        # Indicate that we're using a dynamically generated test suite
                        suite['destroy'] = str(lineno) # (unknown line)
                    if not tpat.match(tname):
                        # Skip non-test methods
                        continue
                    test = { 'name'   : tname,
                        'suite'  : suite,
                        'class'  : 'TestDescription_suite_%s_%s' % (suite['name'], tname),
                        'object' : 'testDescription_suite_%s_%s' % (suite['name'], tname),
                        'line'   : lineno,
                        }
                    suite['tests'].append(test)
                suites.append(suite)

    if not _options.root:
        ntests = 0
        for suite in suites:
            ntests += len(suite['tests'])
        if ntests == 0:
            abort( 'No tests defined' )
    #
    return [_options, suites]


================================================
FILE: cxxtest/python/python3/cxxtest/cxxtest_misc.py
================================================
#!/usr/bin/python
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

import sys

def abort( problem ):
    '''Print error message and exit'''
    sys.stderr.write( '\n' )
    sys.stderr.write( problem )
    sys.stderr.write( '\n\n' )
    sys.exit(2)


================================================
FILE: cxxtest/python/python3/cxxtest/cxxtest_parser.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------


import codecs
import re
#import sys
#import getopt
#import glob
from cxxtest.cxxtest_misc import abort

# Global variables
suites = []
suite = None
inBlock = 0
options=None

def scanInputFiles(files, _options):
    '''Scan all input files for test suites'''
    global options
    options=_options
    for file in files:
        scanInputFile(file)
    global suites
    if len(suites) is 0 and not options.root:
        abort( 'No tests defined' )
    return [options,suites]

lineCont_re = re.compile('(.*)\\\s*$')
def scanInputFile(fileName):
    '''Scan single input file for test suites'''
    # mode 'rb' is problematic in python3 - byte arrays don't behave the same as
    # strings.
    # As far as the choice of the default encoding: utf-8 chews through
    # everything that the previous ascii codec could, plus most of new code.
    # TODO: figure out how to do this properly - like autodetect encoding from
    # file header.
    file = codecs.open(fileName, mode='r', encoding='utf-8')
    prev = ""
    lineNo = 0
    contNo = 0
    while 1:
        line = file.readline()
        if not line:
            break
        lineNo += 1

        m = lineCont_re.match(line)
        if m:
            prev += m.group(1) + " "
            contNo += 1
        else:
            scanInputLine( fileName, lineNo - contNo, prev + line )
            contNo = 0
            prev = ""
    if contNo:
        scanInputLine( fileName, lineNo - contNo, prev + line )
        
    closeSuite()
    file.close()

def scanInputLine( fileName, lineNo, line ):
    '''Scan single input line for interesting stuff'''
    scanLineForExceptionHandling( line )
    scanLineForStandardLibrary( line )

    scanLineForSuiteStart( fileName, lineNo, line )

    global suite
    if suite:
        scanLineInsideSuite( suite, lineNo, line )

def scanLineInsideSuite( suite, lineNo, line ):
    '''Analyze line which is part of a suite'''
    global inBlock
    if lineBelongsToSuite( suite, lineNo, line ):
        scanLineForTest( suite, lineNo, line )
        scanLineForCreate( suite, lineNo, line )
        scanLineForDestroy( suite, lineNo, line )

def lineBelongsToSuite( suite, lineNo, line ):
    '''Returns whether current line is part of the current suite.
    This can be false when we are in a generated suite outside of CXXTEST_CODE() blocks
    If the suite is generated, adds the line to the list of lines'''
    if not suite['generated']:
        return 1

    global inBlock
    if not inBlock:
        inBlock = lineStartsBlock( line )
    if inBlock:
        inBlock = addLineToBlock( suite, lineNo, line )
    return inBlock


std_re = re.compile( r"\b(std\s*::|CXXTEST_STD|using\s+namespace\s+std\b|^\s*\#\s*include\s+<[a-z0-9]+>)" )
def scanLineForStandardLibrary( line ):
    '''Check if current line uses standard library'''
    global options
    if not options.haveStandardLibrary and std_re.search(line):
        if not options.noStandardLibrary:
            options.haveStandardLibrary = 1

exception_re = re.compile( r"\b(throw|try|catch|TSM?_ASSERT_THROWS[A-Z_]*)\b" )
def scanLineForExceptionHandling( line ):
    '''Check if current line uses exception handling'''
    global options
    if not options.haveExceptionHandling and exception_re.search(line):
        if not options.noExceptionHandling:
            options.haveExceptionHandling = 1

classdef = '(?:::\s*)?(?:\w+\s*::\s*)*\w+'
baseclassdef = '(?:public|private|protected)\s+%s' % (classdef,)
general_suite = r"\bclass\s+(%s)\s*:(?:\s*%s\s*,)*\s*public\s+" \
                % (classdef, baseclassdef,)
testsuite = '(?:(?:::)?\s*CxxTest\s*::\s*)?TestSuite'
suites_re = { re.compile( general_suite + testsuite ) : None }
generatedSuite_re = re.compile( r'\bCXXTEST_SUITE\s*\(\s*(\w*)\s*\)' )
def scanLineForSuiteStart( fileName, lineNo, line ):
    '''Check if current line starts a new test suite'''
    for i in list(suites_re.items()):
        m = i[0].search( line )
        if m:
            suite = startSuite( m.group(1), fileName, lineNo, 0 )
            if i[1] is not None:
                for test in i[1]['tests']:
                    addTest(suite, test['name'], test['line'])
            break
    m = generatedSuite_re.search( line )
    if m:
        sys.stdout.write( "%s:%s: Warning: Inline test suites are deprecated.\n" % (fileName, lineNo) )
        startSuite( m.group(1), fileName, lineNo, 1 )

def startSuite( name, file, line, generated ):
    '''Start scanning a new suite'''
    global suite
    closeSuite()
    object_name = name.replace(':',"_")
    suite = { 'name'         : name,
              'file'         : file,
              'cfile'        : cstr(file),
              'line'         : line,
              'generated'    : generated,
              'object'       : 'suite_%s' % object_name,
              'dobject'      : 'suiteDescription_%s' % object_name,
              'tlist'        : 'Tests_%s' % object_name,
              'tests'        : [],
              'lines'        : [] }
    suites_re[re.compile( general_suite + name )] = suite
    return suite

def lineStartsBlock( line ):
    '''Check if current line starts a new CXXTEST_CODE() block'''
    return re.search( r'\bCXXTEST_CODE\s*\(', line ) is not None

test_re = re.compile( r'^([^/]|/[^/])*\bvoid\s+([Tt]est\w+)\s*\(\s*(void)?\s*\)' )
def scanLineForTest( suite, lineNo, line ):
    '''Check if current line starts a test'''
    m = test_re.search( line )
    if m:
        addTest( suite, m.group(2), lineNo )

def addTest( suite, name, line ):
    '''Add a test function to the current suite'''
    test = { 'name'   : name,
             'suite'  : suite,
             'class'  : 'TestDescription_%s_%s' % (suite['object'], name),
             'object' : 'testDescription_%s_%s' % (suite['object'], name),
             'line'   : line,
             }
    suite['tests'].append( test )

def addLineToBlock( suite, lineNo, line ):
    '''Append the line to the current CXXTEST_CODE() block'''
    line = fixBlockLine( suite, lineNo, line )
    line = re.sub( r'^.*\{\{', '', line )
    
    e = re.search( r'\}\}', line )
    if e:
        line = line[:e.start()]
    suite['lines'].append( line )
    return e is None

def fixBlockLine( suite, lineNo, line):
    '''Change all [E]TS_ macros used in a line to _[E]TS_ macros with the correct file/line'''
    return re.sub( r'\b(E?TSM?_(ASSERT[A-Z_]*|FAIL))\s*\(',
                   r'_\1(%s,%s,' % (suite['cfile'], lineNo),
                   line, 0 )

create_re = re.compile( r'\bstatic\s+\w+\s*\*\s*createSuite\s*\(\s*(void)?\s*\)' )
def scanLineForCreate( suite, lineNo, line ):
    '''Check if current line defines a createSuite() function'''
    if create_re.search( line ):
        addSuiteCreateDestroy( suite, 'create', lineNo )

destroy_re = re.compile( r'\bstatic\s+void\s+destroySuite\s*\(\s*\w+\s*\*\s*\w*\s*\)' )
def scanLineForDestroy( suite, lineNo, line ):
    '''Check if current line defines a destroySuite() function'''
    if destroy_re.search( line ):
        addSuiteCreateDestroy( suite, 'destroy', lineNo )

def cstr( s ):
    '''Convert a string to its C representation'''
    return '"' + s.replace( '\\', '\\\\' ) + '"'


def addSuiteCreateDestroy( suite, which, line ):
    '''Add createSuite()/destroySuite() to current suite'''
    if which in suite:
        abort( '%s:%s: %sSuite() already declared' % ( suite['file'], str(line), which ) )
    suite[which] = line

def closeSuite():
    '''Close current suite and add it to the list if valid'''
    global suite
    if suite is not None:
        if len(suite['tests']) is not 0:
            verifySuite(suite)
            rememberSuite(suite)
        suite = None

def verifySuite(suite):
    '''Verify current suite is legal'''
    if 'create' in suite and 'destroy' not in suite:
        abort( '%s:%s: Suite %s has createSuite() but no destroySuite()' %
               (suite['file'], suite['create'], suite['name']) )
    elif 'destroy' in suite and 'create' not in suite:
        abort( '%s:%s: Suite %s has destroySuite() but no createSuite()' %
               (suite['file'], suite['destroy'], suite['name']) )

def rememberSuite(suite):
    '''Add current suite to list'''
    global suites
    suites.append( suite )


================================================
FILE: cxxtest/python/python3/cxxtest/cxxtestgen.py
================================================
#-------------------------------------------------------------------------
# CxxTest: A lightweight C++ unit testing library.
# Copyright (c) 2008 Sandia Corporation.
# This software is distributed under the LGPL License v2.1
# For more information, see the COPYING file in the top CxxTest directory.
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
# the U.S. Government retains certain rights in this software.
#-------------------------------------------------------------------------

# vim: fileencoding=utf-8


# the above import important for forward-compatibility with python3,
# which is already the default in archlinux!

__all__ = ['main']

from . import __release__
import os
import sys
import re
import glob
from optparse import OptionParser
from . import cxxtest_parser

try:
    from . import cxxtest_fog
    imported_fog=True
except ImportError:
    imported_fog=False

from .cxxtest_misc import abort

options = []
suites = []

wrotePreamble = 0
wroteWorld = 0
lastIncluded = ''

def main(args=sys.argv):
    '''The main program'''
    #
    # Reset global state
    #
    global wrotePreamble
    wrotePreamble=0
    global wroteWorld
    wroteWorld=0
    global lastIncluded
    lastIncluded = ''

    global suites
    global options
    files = parseCommandline(args)
    if imported_fog and options.fog:
        [options,suites] = cxxtest_fog.scanInputFiles( files, options )
    else:
        [options,suites] = cxxtest_parser.scanInputFiles( files, options )
    writeOutput()

def parseCommandline(args):
    '''Analyze command line arguments'''
    global imported_fog
    global options
    parser = OptionParser("%prog [options] [<filename> ...]")
    parser.add_option("--version",
                      action="store_true", dest="version", default=False,
                      help="Write the CxxTest version.")
    parser.add_option("-o", "--output",
                      dest="outputFileName", default=None, metavar="NAME",
                      help="Write output to file NAME.")
    parser.add_option("-w","--world", dest="world", default="cxxtest",
                      help="The label of the tests, used to name the XML results.")
    parser.add_option("", "--include", action="append",
                      dest="headers", default=[], metavar="HEADER",
                      help="Include file HEADER in the test runner before other headers.")
    parser.add_option("", "--abort-on-fail",
                      action="store_true", dest="abortOnFail", default=False,
                      help="Abort tests on failed asserts (like xUnit).")
    parser.add_option("", "--main",
                      action="store", dest="main", default="main",
                      help="Specify an alternative name for the main() function.")
    parser.add_option("", "--headers",
                      action="store", dest="header_filename", default=None,
                      help="Specify a filename that contains a list of header files that are processed to generate a test runner.")
    parser.add_option("", "--runner",
                      dest="runner", default="", metavar="CLASS",
                      help="Create a test runner that processes test events using the class CxxTest::CLASS.")
    parser.add_option("", "--gui",
                      dest="gui", metavar="CLASS",
                      help="Create a GUI test runner that processes test events using the class CxxTest::CLASS. (deprecated)")
    parser.add_option("", "--error-printer",
                      action="store_true", dest="error_printer", default=False,
                      help="Create a test runner using the ErrorPrinter class, and allow the use of the standard library.")
    parser.add_option("", "--xunit-printer",
                      action="store_true", dest="xunit_printer", default=False,
                      help="Create a test runner using the XUnitPrinter class.")
    parser.add_option("", "--xunit-file",  dest="xunit_file", default="",
                      help="The file to which the XML summary is written for test runners using the XUnitPrinter class.  The default XML filename is TEST-<world>.xml, where <world> is the value of the --world option.  (default: cxxtest)")
    parser.add_option("", "--have-std",
                      action="store_true", dest="haveStandardLibrary", default=False,
                      help="Use the standard library (even if not found in tests).")
    parser.add_option("", "--no-std",
                      action="store_true", dest="noStandardLibrary", default=False,
                      help="Do not use standard library (even if found in tests).")
    parser.add_option("", "--have-eh",
                      action="store_true", dest="haveExceptionHandling", default=False,
                      help="Use exception handling (even if not found in tests).")
    parser.add_option("", "--no-eh",
                      action="store_true", dest="noExceptionHandling", default=False,
                      help="Do not use exception handling (even if found in tests).")
    parser.add_option("", "--longlong",
                      dest="longlong", default=None, metavar="TYPE",
                      help="Use TYPE as for long long integers.  (default: not supported)")
    parser.add_option("", "--no-static-init",
                      action="store_true", dest="noStaticInit", default=False,
                      help="Do not rely on static initialization in the test runner.")
    parser.add_option("", "--template",
                      dest="templateFileName", default=None, metavar="TEMPLATE",
                      help="Generate the test runner using file TEMPLATE to define a template.")
    parser.add_option("", "--root",
                      action="store_true", dest="root", default=False,
                      help="Write the main() function and global data for a test runner.")
    parser.add_option("", "--part",
                      action="store_true", dest="part", default=False,
                      help="Write the tester classes for a test runner.")
    #parser.add_option("", "--factor",
                      #action="store_true", dest="factor", default=False,
                      #help="Declare the _CXXTEST_FACTOR macro.  (deprecated)")
    if imported_fog:
        fog_help = "Use new FOG C++ parser"
    else:
        fog_help = "Use new FOG C++ parser (disabled)"
    parser.add_option("-f", "--fog-parser",
                        action="store_true",
                        dest="fog",
                        default=False,
                        help=fog_help
                        )

    (options, args) = parser.parse_args(args=args)
    if not options.header_filename is None:
        if not os.path.exists(options.header_filename):
            abort( "ERROR: the file '%s' does not exist!" % options.header_filename )
        INPUT = open(options.header_filename)
        headers = [line.strip() for line in INPUT]
        args.extend( headers )
        INPUT.close()

    if options.fog and not imported_fog:
        abort( "Cannot use the FOG parser.  Check that the 'ply' package is installed.  The 'ordereddict' package is also required if running Python 2.6")

    if options.version:
      printVersion()

    # the cxxtest builder relies on this behaviour! don't remove
    if options.runner == 'none':
        options.runner = None

    if options.xunit_printer or options.runner == "XUnitPrinter":
        options.xunit_printer=True
        options.runner="XUnitPrinter"
        if len(args) > 1:
            if options.xunit_file == "":
                if options.world == "":
                    options.world = "cxxtest"
                options.xunit_file="TEST-"+options.world+".xml"
        elif options.xunit_file == "":
            if options.world == "":
                options.world = "cxxtest"
            options.xunit_file="TEST-"+options.world+".xml"

    if options.error_printer:
      options.runner= "ErrorPrinter"
      options.haveStandardLibrary = True
    
    if options.noStaticInit and (options.root or options.part):
        abort( '--no-static-init cannot be used with --root/--part' )

    if options.gui and not options.runner:
        options.runner = 'StdioPrinter'

    files = setFiles(args[1:])
    if len(files) == 0 and not options.root:
        sys.stderr.write(parser.error("No input files found"))

    return files


def printVersion():
    '''Print CxxTest version and exit'''
    sys.stdout.write( "This is CxxTest version %s.\n" % __release__.__version__ )
    sys.exit(0)

def setFiles(patterns ):
    '''Set input files specified on command line'''
    files = expandWildcards( patterns )
    return files

def expandWildcards( patterns ):
    '''Expand all wildcards in an array (glob)'''
    fileNames = []
    for pathName in patterns:
        patternFiles = glob.glob( pathName )
        for fileName in patternFiles:
            fileNames.append( fixBackslashes( fileName ) )
    return fileNames

def fixBackslashes( fileName ):
    '''Convert backslashes to slashes in file name'''
    return re.sub( r'\\', '/', fileName, 0 )


def writeOutput():
    '''Create output file'''
    if options.templateFileName:
        writeTemplateOutput()
    else:
        writeSimpleOutput()

def writeSimpleOutput():
    '''Create output not based on template'''
    output = startOutputFile()
    writePreamble( output )
    if options.root or not options.part:
        writeMain( output )

    if len(suites) > 0:
        output.write("bool "+suites[0]['object']+"_init = false;\n")

    writeWorld( output )
    output.close()

include_re = re.compile( r"\s*\#\s*include\s+<cxxtest/" )
preamble_re = re.compile( r"^\s*<CxxTest\s+preamble>\s*$" )
world_re = re.compile( r"^\s*<CxxTest\s+world>\s*$" )
def writeTemplateOutput():
    '''Create output based on template file'''
    template = open(options.templateFileName)
    output = startOutputFile()
    while 1:
        line = template.readline()
        if not line:
            break;
        if include_re.search( line ):
            writePreamble( output )
            output.write( line )
        elif preamble_re.search( line ):
            writePreamble( output )
        elif world_re.search( line ):
            if len(suites) > 0:
                output.write("bool "+suites[0]['object']+"_init = false;\n")
            writeWorld( output )
        else:
            output.write( line )
    template.close()
    output.close()

def startOutputFile():
    '''Create output file and write header'''
    if options.outputFileName is not None:
        output = open( options.outputFileName, 'w' )
    else:
        output = sys.stdout
    output.write( "/* Generated file, do not edit */\n\n" )
    return output

def writePreamble( output ):
    '''Write the CxxTest header (#includes and #defines)'''
    global wrotePreamble
    if wrotePreamble: return
    output.write( "#ifndef CXXTEST_RUNNING\n" )
    output.write( "#define CXXTEST_RUNNING\n" )
    output.write( "#endif\n" )
    output.write( "\n" )
    if options.xunit_printer:
        output.write( "#include <fstream>\n" )
    if options.haveStandardLibrary:
        output.write( "#define _CXXTEST_HAVE_STD\n" )
    if options.haveExceptionHandling:
        output.write( "#define _CXXTEST_HAVE_EH\n" )
    if options.abortOnFail:
        output.write( "#define _CXXTEST_ABORT_TEST_ON_FAIL\n" )
    if options.longlong:
        output.write( "#define _CXXTEST_LONGLONG %s\n" % options.longlong )
    #if options.factor:
        #output.write( "#define _CXXTEST_FACTOR\n" )
    for header in options.headers:
        output.write( "#include \"%s\"\n" % header )
    output.write( "#include <cxxtest/TestListener.h>\n" )
    output.write( "#include <cxxtest/TestTracker.h>\n" )
    output.write( "#include <cxxtest/TestRunner.h>\n" )
    output.write( "#include <cxxtest/RealDescriptions.h>\n" )
    output.write( "#include <cxxtest/TestMain.h>\n" )
    if options.runner:
        output.write( "#include <cxxtest/%s.h>\n" % options.runner )
    if options.gui:
        output.write( "#include <cxxtest/%s.h>\n" % options.gui )
    output.write( "\n" )
    wrotePreamble = 1

def writeMain( output ):
    '''Write the main() function for the test runner'''
    if not (options.gui or options.runner):
       return
    output.write( 'int %s( int argc, char *argv[] ) {\n' % options.main )
    output.write( ' int status;\n' )
    if options.noStaticInit:
        output.write( ' CxxTest::initialize();\n' )
    if options.gui:
        tester_t = "CxxTest::GuiTuiRunner<CxxTest::%s, CxxTest::%s> " % (options.gui, options.runner)
    else:
        tester_t = "CxxTest::%s" % (options.runner)
    if options.xunit_printer:
       output.write( '    std::ofstream ofstr("%s");\n' % options.xunit_file )
       output.write( '    %s tmp(ofstr);\n' % tester_t )
       output.write( '    CxxTest::RealWorldDescription::_worldName = "%s";\n' % options.world )
    else:
       output.write( '    %s tmp;\n' % tester_t )
    output.write( '    status = CxxTest::Main<%s>( tmp, argc, argv );\n' % tester_t )
    output.write( '    return status;\n')
    output.write( '}\n' )


def writeWorld( output ):
    '''Write the world definitions'''
    global wroteWorld
    if wroteWorld: return
    writePreamble( output )
    writeSuites( output )
    if options.root or not options.part:
        writeRoot( output )
        writeWorldDescr( output )
    if options.noStaticInit:
        writeInitialize( output )
    wroteWorld = 1

def writeSuites(output):
    '''Write all TestDescriptions and SuiteDescriptions'''
    for suite in suites:
        writeInclude( output, suite['file'] )
        if isGenerated(suite):
            generateSuite( output, suite )
        if isDynamic(suite):
            writeSuitePointer( output, suite )
        else:
            writeSuiteObject( output, suite )
        writeTestList( output, suite )
        writeSuiteDescription( output, suite )
        writeTestDescriptions( output, suite )

def isGenerated(suite):
    '''Checks whether a suite class should be created'''
    return suite['generated']

def isDynamic(suite):
    '''Checks whether a suite is dynamic'''
    return 'create' in suite

def writeInclude(output, file):
    '''Add #include "file" statement'''
    global lastIncluded
    if file == lastIncluded: return
    output.writelines( [ '#include "', file, '"\n\n' ] )
    lastIncluded = file

def generateSuite( output, suite ):
    '''Write a suite declared with CXXTEST_SUITE()'''
    output.write( 'class %s : public CxxTest::TestSuite {\n' % suite['name'] )
    output.write( 'public:\n' )
    for line in suite['lines']:
        output.write(line)
    output.write( '};\n\n' )

def writeSuitePointer( output, suite ):
    '''Create static suite pointer object for dynamic suites'''
    if options.noStaticInit:
        output.write( 'static %s *%s;\n\n' % (suite['name'], suite['object']) )
    else:
        output.write( 'static %s *%s = 0;\n\n' % (suite['name'], suite['object']) )

def writeSuiteObject( output, suite ):
    '''Create static suite object for non-dynamic suites'''
    output.writelines( [ "static ", suite['name'], " ", suite['object'], ";\n\n" ] )

def writeTestList( output, suite ):
    '''Write the head of the test linked list for a suite'''
    if options.noStaticInit:
        output.write( 'static CxxTest::List %s;\n' % suite['tlist'] )
    else:
        output.write( 'static CxxTest::List %s = { 0, 0 };\n' % suite['tlist'] )

def writeWorldDescr( output ):
    '''Write the static name of the world name'''
    if options.noStaticInit:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName;\n' )
    else:
        output.write( 'const char* CxxTest::RealWorldDescription::_worldName = "cxxtest";\n' )

def writeTestDescriptions( output, suite ):
    '''Write all test descriptions for a suite'''
    for test in suite['tests']:
        writeTestDescription( output, suite, test )

def writeTestDescription( output, suite, test ):
    '''Write test description object'''
    output.write( 'static class %s : public CxxTest::RealTestDescription {\n' % test['class'] )
    output.write( 'public:\n' )
    if not options.noStaticInit:
        output.write( ' %s() : CxxTest::RealTestDescription( %s, %s, %s, "%s" ) {}\n' %
                      (test['class'], suite['tlist'], suite['dobject'], test['line'], test['name']) )
    output.write( ' void runTest() { %s }\n' % runBody( suite, test ) )
    output.write( '} %s;\n\n' % test['object'] )

def runBody( suite, test ):
    '''Body of TestDescription::run()'''
    if isDynamic(suite): return dynamicRun( suite, test )
    else: return staticRun( suite, test )

def dynamicRun( suite, test ):
    '''Body of TestDescription::run() for test in a dynamic suite'''
    return 'if ( ' + suite['object'] + ' ) ' + suite['object'] + '->' + test['name'] + '();'
    
def staticRun( suite, test ):
    '''Body of TestDescription::run() for test in a non-dynamic suite'''
    return suite['object'] + '.' + test['name'] + '();'
    
def writeSuiteDescription( output, suite ):
    '''Write SuiteDescription object'''
    if isDynamic( suite ):
        writeDynamicDescription( output, suite )
    else:
        writeStaticDescription( output, suite )

def writeDynamicDescription( output, suite ):
    '''Write SuiteDescription for a dynamic suite'''
    output.write( 'CxxTest::DynamicSuiteDescription<%s> %s' % (suite['name'], suite['dobject']) )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s, %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['tlist'],
                       suite['object'], suite['create'], suite['destroy']) )
    output.write( ';\n\n' )

def writeStaticDescription( output, suite ):
    '''Write SuiteDescription for a static suite'''
    output.write( 'CxxTest::StaticSuiteDescription %s' % suite['dobject'] )
    if not options.noStaticInit:
        output.write( '( %s, %s, "%s", %s, %s )' %
                      (suite['cfile'], suite['line'], suite['name'], suite['object'], suite['tlist']) )
    output.write( ';\n\n' )

def writeRoot(output):
    '''Write static members of CxxTest classes'''
    output.write( '#include <cxxtest/Root.cpp>\n' )

def writeInitialize(output):
    '''Write CxxTest::initialize(), which replaces static initialization'''
    output.write( 'namespace CxxTest {\n' )
    output.write( ' void initialize()\n' )
    output.write( ' {\n' )
    for suite in suites:
        output.write( '  %s.initialize();\n' % suite['tlist'] )
        if isDynamic(suite):
            output.write( '  %s = 0;\n' % suite['object'] )
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s, %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['tlist'], suite['object'], suite['create'], suite['destroy']) )
        else:
            output.write( '  %s.initialize( %s, %s, "%s", %s, %s );\n' %
                          (suite['dobject'], suite['cfile'], suite['line'], suite['name'],
                           suite['object'], suite['tlist']) )

        for test in suite['tests']:
            output.write( '  %s.initialize( %s, %s, %s, "%s" );\n' %
                          (test['object'], suite['tlist'], suite['dobject'], test['line'], test['name']) )

    output.write( ' }\n' )
    output.write( '}\n' )


================================================
FILE: cxxtest/python/python3/scripts/cxxtestgen
================================================
#! python

import cxxtest.cxxtestgen

cxxtest.cxxtestgen.main()


================================================
FILE: cxxtest/python/scripts/cxxtestgen
================================================
#! python

import cxxtest.cxxtestgen

cxxtest.cxxtestgen.main()


================================================
FILE: cxxtest/python/setup.py
================================================
"""
Script to generate the installer for cxxtest.
"""

classifiers = """\
Development Status :: 4 - Beta
Intended Audience :: End Users/Desktop
License :: OSI Approved :: LGPL License
Natural Language :: English
Operating System :: Microsoft :: Windows
Operating System :: Unix
Programming Language :: Python
Topic :: Software Development :: Libraries :: Python Modules
"""

import os
import sys
from os.path import realpath, dirname
if sys.version_info >= (3,0):
    sys.path.insert(0, dirname(realpath(__file__))+os.sep+'python3')
    os.chdir('python3')

import cxxtest

try:
    from setuptools import setup
except ImportError:
    from distutils.core import setup

doclines = cxxtest.__doc__.split("\n")

setup(name="cxxtest",
      version=cxxtest.__version__,
      maintainer=cxxtest.__maintainer__,
      maintainer_email=cxxtest.__maintainer_email__,
      url = cxxtest.__url__,
      license = cxxtest.__license__,
      platforms = ["any"],
      description = doclines[0],
      classifiers = filter(None, classifiers.split("\n")),
      long_description = "\n".join(doclines[2:]),
      packages=['cxxtest'],
      keywords=['utility'],
      scripts=['scripts/cxxtestgen']
      #
      # The entry_points option is not supported by distutils.core
      #
      #entry_points="""
        #[console_scripts]
        #cxxtestgen = cxxtest.cxxtestgen:main
      #"""
      )


================================================
FILE: demoapps/CMakeLists.txt
================================================
project(GraphLab)

# link_libraries(${Boost_LIBRARIES})
# link_libraries(${GraphLab_LIBRARIES})


macro(add_all_subdirectories retval curdir)
  file(GLOB sub-dir RELATIVE ${curdir} *)
  set(list_of_dirs "")
  foreach(dir ${sub-dir})
    if(IS_DIRECTORY ${curdir}/${dir})
    STRING(SUBSTRING ${dir} 0 1 firstchar)
        if(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_" )
        else(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_")
          set(list_of_dirs ${list_of_dirs} ${dir})
          message(STATUS "Detected demo app: " ${dir})
          add_subdirectory(${dir})
        endif()
    endif()
  endforeach()
  set(${retval} ${list_of_dirs})
endmacro()

add_all_subdirectories(retval, ${CMAKE_CURRENT_SOURCE_DIR})


================================================
FILE: demoapps/dsl/CMakeLists.txt
================================================
project(GraphLab)

# add_graphlab_executable(gl_server gl_server.cpp)

# add_library(gen_impl SHARED gen_impl.cpp)


================================================
FILE: demoapps/dsl/gen_impl.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cmath>
#include <iostream>
#include "graph_typedefs.gen"

using namespace graphlab;

#define ALPHA 0.87

extern "C" void user_program(user_funs* f) {
    float prev = f->get_vertex_data();
    float neighbors = f->reduce_neighbors(IN_EDGES);
    //neighbors = neighbors/out_edges
    float curr = ALPHA*neighbors + (1-ALPHA);
    f->set_vertex_data(curr);
    float last_change = std::abs(curr - prev);
    std::cout << "last change was: " << last_change << std::endl;
    if (last_change > 0.01) {
	f->signal_neighbors(OUT_EDGES);
    }
    std::cout.flush();
}

extern "C" void vertex_reduce(vertex_data_type& a, const vertex_data_type& b) {
    a += b;
}


================================================
FILE: demoapps/dsl/gl_server.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>

#include <dlfcn.h>

using namespace graphlab;

#include "graph_typedefs.gen"

#define GET_EDGE_DATA 100
#define GET_VERTEX_DATA 101

typedef distributed_graph<vertex_data_type, edge_data_type> graph_type;
typedef gl3engine<graph_type> engine_type;


//here we keep one global copy of the user defined function pointers
void (*user_program)(user_funs*) = NULL;
void (*vertex_reduce)(vertex_data_type&,const vertex_data_type&) = NULL;
void (*edge_reduce)(edge_data_type&,const edge_data_type&) = NULL;

//here are definitions of functions passed to dynamically linked user code
graph_type::vertex_data_type get_vertex_data(graph_type::vertex_type* v) {
    return v->data();
}
void set_vertex_data(graph_type::vertex_data_type& d, graph_type::vertex_type* v) {
    v->data() = d;
}

// graph_type::edge_data_type edge_get_map(graph_type::edge_type& e) {
//     return e.data();
// }
graph_type::vertex_data_type vertex_get_map(const graph_type::vertex_type& v) {
    return v.data();
}

// void edge_get_reduce(edge_data_type& ev, const edge_data_type ed) {
//     edge_reduce(ev,ed);
// }

void vertex_get_reduce(vertex_data_type& ev, const vertex_data_type ed) {
    vertex_reduce(ev,ed);
}

//wonder why this isn't working. hm...
// void edge_get_reduce(std::vector<graph_type::edge_data_type>& ev, const graph_type::edge_data_type ed) {
//     ev.push_back(ed);
// }

// void vertex_get_reduce(std::vector<graph_type::vertex_data_type>& vv, const graph_type::edge_data_type vd) {
//     vv.push_back(vd);
// }

// std::vector<edge_data_type> get_neighboring_edges() {
//     return current_context->map_reduce<std::vector<edge_data_type> >(GET_EDGE_DATA, ALL_EDGES);
// }
// std::vector<vertex_data_type> get_neighboring_vertices() {
//     return current_context->map_reduce<std::vector<vertex_data_type> >(GET_VERTEX_DATA, ALL_EDGES);
// }

vertex_data_type reduce_neighbors(edge_dir_type d, engine_type::context_type* ctx) {
    return ctx->map_reduce<vertex_data_type>(GET_VERTEX_DATA, d);
}

void signal_neighbors(edge_dir_type d, engine_type::context_type* ctx) {
    ctx->broadcast_signal(d);
}

// edge_data_type reduce_edges() {
//     return current_context->map_reduce<edge_data_type>(GET_EDGE_DATA, ALL_EDGES);
// }

void server_program(engine_type::context_type& context,
		      graph_type::vertex_type& vertex,
		      const engine_type::message_type& unused) {


        //capture functions for user to call
    user_funs f;
    f.vtx = &vertex;
    f.ctx = &context;
    f._get_vertex_data = (vertex_data_type (*)(void*))get_vertex_data;
    f._set_vertex_data = (void (*)(vertex_data_type&,void*))set_vertex_data;
    f._reduce_neighbors = (vertex_data_type (*)(edge_dir_type,void*))reduce_neighbors;
    f._signal_neighbors = (void (*)(edge_dir_type,void*))signal_neighbors;
    //f.reduce_edges = reduce_edges;

    //call dynamically linked user function
    user_program(&f);
    
    // float prev = vertex.data();
    // // map reduce over neighbors
    // vertex.data() = 0.15 + 0.85 *
    // 	context.map_reduce<float>(PAGERANK_MAP_REDUCE, IN_EDGES);

    // float last_change = std::fabs((vertex.data()- prev) / vertex.num_out_edges());
    // if (last_change > TOLERANCE) {
    // 	// signals out neighbors if I change substantially
    // 	context.broadcast_signal(OUT_EDGES);
    // }
}


void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }

int main(int argc, char** argv) {

  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  //clopts.attach_option("tol", TOLERANCE,
  //                     "The permissible change at convergence.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");
  //set single threaded
  //clopts.set_ncpus(1);
  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }


  // Dynamically link the functions
  dc.cout() << "Starting dynamic linking" << std::endl;
  void* handle = dlopen("/home/emullen/graphlab/graphlab2.2/graphlabapi/debug/demoapps/dsl/libgen_impl.so", RTLD_LAZY);
  if (handle == NULL) {
      dc.cout() << dlerror() << std::endl;
      assert(handle != NULL);
  }

  // pagerank_map = (float (*)(const graphlab::distributed_graph<float, graphlab::empty>::vertex_type&))dlsym(pagerank_handle, "pagerank_map");
  // assert(pagerank_map != NULL);
  // pagerank_combine = (void (*)(float&, const float&))dlsym(pagerank_handle, "pagerank_combine");
  // assert(pagerank_combine != NULL);
  user_program = (void (*)(user_funs*))dlsym(handle, "user_program");
  assert(user_program != NULL);
  vertex_reduce = (void (*)(vertex_data_type&,const vertex_data_type&))dlsym(handle, "vertex_reduce");
  assert(vertex_reduce != NULL);
  // edge_reduce = (void (*)(edge_data_type&,const edge_data_type&))dlsym(handle, "edge_reduce");
  // assert(edge_reduce != NULL);
  dc.cout() << "Finished dynamic linking" << std::endl;

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

    // Running The Engine -------------------------------------------------------
  engine_type engine(dc, graph, clopts);

  // register the map reduce operation before usage
  // Each task registration must have a distinct ID ranging fro 0 to 223
  //engine.register_map_reduce(PAGERANK_MAP_REDUCE,
  //                           pagerank_map,
  //                           pagerank_combine);
  // engine.register_map_reduce(GET_EDGE_DATA,
  // 			     edge_get_map,
  // 			     edge_get_reduce);
  engine.register_map_reduce(GET_VERTEX_DATA,
			     vertex_get_map,
			     vertex_get_reduce);

  engine.set_vertex_program(server_program);
  engine.signal_all();
  engine.wait();
  
  // Save the final graph -----------------------------------------------------
  // if (saveprefix != "") {
  //   graph.save(saveprefix, pagerank_writer(),
  //              false,    // do not gzip
  //              true,     // save vertices
  //              false);   // do not save edges
  // }

  dlclose(handle);

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: demoapps/dsl/graph_typedefs.gen
================================================
#include <vector>
#include <graphlab/graph/graph_basic_types.hpp>
#include <ostream>

typedef float vertex_data_type;
typedef float edge_data_type;

typedef struct {
    void* ctx;    
    void* vtx;
    vertex_data_type get_vertex_data() {
	return _get_vertex_data(vtx);
    }
    void set_vertex_data(vertex_data_type vd) {
	_set_vertex_data(vd,vtx);
    }
    vertex_data_type reduce_neighbors(graphlab::edge_dir_type e) {
	return _reduce_neighbors(e,ctx);
    }
    void signal_neighbors(graphlab::edge_dir_type e) {
	_signal_neighbors(e,ctx);
    }
    vertex_data_type (*_get_vertex_data)(void*);
    void (*_set_vertex_data)(vertex_data_type&,void*);
    //std::vector<edge_data_type> (*get_neighboring_edges)();
    //std::vector<vertex_data_type> (*get_neighboring_vertices)();
    vertex_data_type (*_reduce_neighbors)(graphlab::edge_dir_type,void*);
    void (*_signal_neighbors)(graphlab::edge_dir_type,void*);
    //edge_data_type (*reduce_edges)();
} user_funs;


================================================
FILE: demoapps/dsl/impl.graphlab
================================================
types {
  vertextype float
  edgetype float
}


def update() {
  //get a local copy of the vertex data
  float prev = get_vertex_data()

  //get a sum of the neighbors' values
  //uses vertex_reduce defined below
  float neighbors = reduce_neighbors(IN_EDGES)

  //calculate updated value
  float curr = 0.85*neighbors + 0.15

  //modify our data
  set_vertex_data(curr)

  //calculate change
  float last_change = curr - prev
  if (last_change < 0) {
    last_change = -1 * last_change
  }

  //signal neighbors
  if (last_change > 0.01) {
    signal_neighbors(OUT_EDGES)
  }
}

def vertex_reduce(a:vertextype,b:vertextype) {
  a += b
}


================================================
FILE: demoapps/dsl/scala_impl/dsl.scala
================================================
import scala.util.parsing.combinator.syntactical._


object GLParser extends StandardTokenParsers {
  val keywords = List("update","reduce_edges","reduce_vertices","print")
  val types = List("bool","int","float")

  lexical.reserved ++= keywords
  lexical.reserved ++= types

  lexical.delimiters ++= List("{","}","(",")",".",",","+","-","*","/","<",">","=")

  def numLit:Parser[Int] = opt("-") ~ numericLit ^^ {
    case None ~ n => n.toInt
    case Some(_) ~ n => (-1)*n.toInt
  }


}


================================================
FILE: demoapps/pagerank/CMakeLists.txt
================================================
project(GraphLab)


add_graphlab_executable(simple_pagerank simple_pagerank.cpp)

add_graphlab_executable(warp_parfor_pagerank warp_parfor_pagerank.cpp)

add_graphlab_executable(warp_engine_pagerank warp_engine_pagerank.cpp)


================================================
FILE: demoapps/pagerank/simple_pagerank.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
// #include <graphlab/macros_def.hpp>

// Global random reset probability
float RESET_PROB = 0.15;

float TOLERANCE = 1.0E-2;

// The vertex data is just the pagerank value (a float)
typedef float vertex_data_type;

// There is no edge data in the pagerank application
typedef graphlab::empty edge_data_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, edge_data_type> graph_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


/*
 * The factorized page rank update function extends ivertex_program
 * specifying the:
 *
 *   1) graph_type
 *   2) gather_type: float (returned by the gather function). Note
 *      that the gather type is not strictly needed here since it is
 *      assumed to be the same as the vertex_data_type unless
 *      otherwise specified
 *
 * In addition ivertex program also takes a message type which is
 * assumed to be empty. Since we do not need messages no message type
 * is provided.
 *
 * pagerank also extends graphlab::IS_POD_TYPE (is plain old data type)
 * which tells graphlab that the pagerank program can be serialized
 * (converted to a byte stream) by directly reading its in memory
 * representation.  If a vertex program does not exted
 * graphlab::IS_POD_TYPE it must implement load and save functions.
 */
class pagerank :
  public graphlab::ivertex_program<graph_type, float>,
  public graphlab::IS_POD_TYPE {
  float last_change;
public:
  /* Gather the weighted rank of the adjacent page   */
  float gather(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    return ((1.0 - RESET_PROB) / edge.source().num_out_edges()) *
      edge.source().data();
  }

  /* Use the total rank of adjacent pages to update this page */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    const double newval = total + RESET_PROB;
    last_change = std::fabs(newval - vertex.data());
    vertex.data() = newval;
  }

  /* The scatter edges depend on whether the pagerank has converged */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    if (last_change > TOLERANCE) return graphlab::OUT_EDGES;
    else return graphlab::NO_EDGES;
  }

  /* The scatter function just signal adjacent pages */
  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    context.signal(edge.target());
  }
}; // end of factorized_pagerank update functor


/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  engine.start();
  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;

  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: demoapps/pagerank/warp_engine_pagerank.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/warp_engine.hpp>
using namespace graphlab;

// The graph type is determined by the vertex and edge data types
typedef distributed_graph<float , graphlab::empty> graph_type;
typedef warp::warp_engine<graph_type> warp_engine_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
  return other.data() / other.num_out_edges();
}


void signal_neighbor(warp_engine_type::context& context,
                     graph_type::edge_type edge, graph_type::vertex_type other) {
  context.signal(other);
}


void pagerank(warp_engine_type::context& context,
              graph_type::vertex_type vertex) {
  float old_vdata = vertex.data();
  vertex.data() = 0.15 + 0.85 * warp::map_reduce_neighborhood(vertex,
                                                              IN_EDGES,
                                                              pagerank_map);
  if (std::fabs(old_vdata - vertex.data()) > 1E-2) {
    warp::broadcast_neighborhood(context, vertex, OUT_EDGES, signal_neighbor);
  }
}


/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  // Initialize control plain using mpi
  mpi_tools::init(argc, argv);
  distributed_control dc;

  // Parse command line options -----------------------------------------------
  command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "tsv";
  clopts.attach_option("graph", graph_dir, "The graph file. ");
  clopts.attach_option("format", format, "The graph file format");
  size_t iterations = 10;
  clopts.attach_option("iterations", iterations,
                       "Number of asynchronous iterations to run");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "Prefix to save the output pagerank in");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  timer ti;
  warp_engine_type engine(dc, graph, clopts);
  engine.signal_all();
  engine.set_update_function(pagerank);
  engine.start();

  dc.cout() << "Finished Running in " << ti.current_time()
            << " seconds." << std::endl;


  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  mpi_tools::finalize();
} // End of main


================================================
FILE: demoapps/pagerank/warp_parfor_pagerank.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/warp_graph_mapreduce.hpp>
#include <graphlab/engine/warp_parfor_all_vertices.hpp>
#include <graphlab/engine/warp_graph_transform.hpp>
using namespace graphlab;

// The graph type is determined by the vertex and edge data types
typedef distributed_graph<float , float> graph_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
  return other.data() / other.num_out_edges();
}

void pagerank(graph_type::vertex_type vertex) {
  vertex.data() = 0.15 + 0.85 * warp::map_reduce_neighborhood(vertex,
                                                              IN_EDGES,
                                                              pagerank_map);
}

/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  mpi_tools::init(argc, argv);
  distributed_control dc;

  // Parse command line options -----------------------------------------------
  command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "tsv";
  clopts.attach_option("graph", graph_dir, "The graph file. ");
  clopts.attach_option("format", format, "The graph file format");
  size_t iterations = 10;
  clopts.attach_option("iterations", iterations,
                       "Number of asynchronous iterations to run");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "Prefix to save the output pagerank in");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  // must call finalize before querying the graph
  graph.finalize();

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  timer ti;
  for (size_t i = 0;i < iterations; ++i) {
    warp::parfor_all_vertices(graph, pagerank);
    std::cout << "Iteration " << i << " complete\n";
  }

  dc.cout() << "Finished Running in " << ti.current_time()
            << " seconds." << std::endl;


  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  mpi_tools::finalize();
} // End of main


================================================
FILE: demoapps/rpc/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(rpc_example1 rpc_example1.cpp)
add_graphlab_executable(rpc_example2 rpc_example2.cpp)
add_graphlab_executable(rpc_example3 rpc_example3.cpp)
add_graphlab_executable(rpc_example4 rpc_example4.cpp)
add_graphlab_executable(rpc_example5 rpc_example5.cpp)
add_graphlab_executable(rpc_example6 rpc_example6.cpp)
add_graphlab_executable(rpc_example7 rpc_example7.cpp)
add_graphlab_executable(rpc_example8 rpc_example8.cpp)
add_graphlab_executable(rpc_example9 rpc_example9.cpp)

#add_graphlab_executable(barrier_test barrier_test.cpp)
add_graphlab_executable(dht_performance_test dht_performance_test.cpp)

add_graphlab_executable(rpc_call_perf_test rpc_call_perf_test.cpp)

add_graphlab_executable(fiber_future_test fiber_future_test.cpp)
add_graphlab_executable(obj_fiber_future_test obj_fiber_future_test.cpp)


================================================
FILE: demoapps/rpc/dht_performance_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/util/generics/any.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>    
#include <graphlab/rpc/dht.hpp>
#include <graphlab/logger/logger.hpp>
using namespace graphlab;

std::string randstring(size_t len) {
  std::string str;
  str.resize(len);
  const char *charset="ab";
  size_t charsetlen = 64;
  for (size_t i = 0;i < len; ++i) {
    str[i] = charset[rand()  % charsetlen];
  }
  return str;
}

int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;
  
  std::cout << "I am machine id " << dc.procid() 
            << " in " << dc.numprocs() << " machines"<<std::endl;
  dht<std::string, std::string> testdht(dc);
  dc.barrier();  
  std::vector<std::pair<std::string, std::string> > data;
  const size_t NUMSTRINGS = 10000;
  const size_t strlen[4] = {16, 128, 1024, 10240};
  // fill rate
  for (size_t l = 0; l < 4; ++l) {
    timer ti;
    if (dc.procid() == 0) {
      std::cout << "String Length = " << strlen[l] << std::endl;
      data.clear();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        data.push_back(std::make_pair(randstring(8), randstring(strlen[l])));
      }
      std::cout << "10k random strings generated" << std::endl;
      std::cout << "Starting set" << std::endl;
     ti.start();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        testdht.set(data[i].first, data[i].second);
        if (i % 100000 == 0) {
          std::cout << ".";
          std::cout.flush();
        }
      }
      std::cout << "10k insertions in " << ti.current_time();
    }
      dc.full_barrier();
      if (dc.procid() == 0) std::cout << "\t" << ti.current_time() << " " << double(strlen[l]*NUMSTRINGS)/ti.current_time()/1024/1024 <<  std::endl;
  //  dc.barrier();
    // get rate
    if (dc.procid() == 0) {
      std::cout << "Starting get" << std::endl;

      timer ti;
      ti.start();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        std::pair<bool, std::string> ret = testdht.get(data[i].first);
        assert(ret.first);
        if (i % 100 == 0) {
          std::cout << ".";
          std::cout.flush();
        }
      }
      std::cout << "10k reads in " << ti.current_time() << std::endl;
    }

    if (dc.procid() == 0) {
      std::cout << "Starting background gets" << std::endl;

      timer ti;
      std::vector<request_future<std::pair<bool, std::string> > > futures;
      futures.resize(NUMSTRINGS);
      ti.start();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        futures[i] = testdht.get_future(data[i].first);
      }
      std::cout << "gets issued." << std::endl;
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        std::pair<bool, std::string> ret = futures[i]();
        if (i % 100 == 0) {
          std::cout << ".";
          std::cout.flush();
        }
      }
      std::cout << "10k reads in " << ti.current_time() << std::endl;
    }

    testdht.clear();
  }
  dc.barrier();
  testdht.print_stats();
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/fiber_future_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
using namespace graphlab;

atomic<size_t> complete_count;

size_t some_remote_function(size_t a) {
  return a;
}

void test_fiber(size_t sequential_count) {
  for (size_t i = 0;i < sequential_count; ++i) {
    request_future<size_t> ret = fiber_remote_request(1, some_remote_function, 1);
    complete_count.inc(ret()); 
  }
}

int main(int argc, char** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;
  timer ti;
  // with fibers
  if (dc.procid() == 0) {
    fiber_group group(8192);
    for (int i = 0;i < 1600000; ++i) {
      group.launch(boost::bind(test_fiber, 1));
      if (i % 100000 == 0) std::cout << i << "\n";
    }
    group.join();
    std::cout << "completed requests: " << complete_count.value << " in " << ti.current_time() << "\n";  
  }

  dc.barrier();
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/obj_fiber_future_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
using namespace graphlab;

struct testclass {
  dc_dist_object<testclass> rmi;
  atomic<size_t> complete_count;

  testclass(distributed_control& dc): rmi(dc, this) { }


  size_t some_remote_function(size_t a) {
    return a;
  }

  void test_fiber(size_t sequential_count) {
    for (size_t i = 0;i < sequential_count; ++i) {
      request_future<size_t> ret = object_fiber_remote_request(rmi, 1, &testclass::some_remote_function, 1);
      complete_count.inc(ret()); 
    }
  }
};


int main(int argc, char** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;
  testclass tc(dc);
  timer ti;
  // with fibers
  if (dc.procid() == 0) {
    fiber_group group(4096);
    for (int i = 0;i < 1600000; ++i) {
      group.launch(boost::bind(&testclass::test_fiber, &tc, 1));
    }
    group.join();
    std::cout << "completed requests: " << tc.complete_count.value << " in " << ti.current_time() << "\n";  
  }

  dc.barrier();
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_call_perf_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/util/timer.hpp>
using namespace graphlab;

#define SEND_LIMIT (64 * 1024 * 1024)
#define SEND_LIMIT_PRINT "64MB"
struct teststruct {

  dc_dist_object<teststruct> rmi;
  teststruct(distributed_control &dc):rmi(dc, this) {
    dc.barrier();
  }

  /**
   *  Receiver
   */

  atomic<size_t> ctr;
  void receive_ints(size_t i0, size_t i1, size_t i2, size_t i3) {
    ctr.inc();
  }


  void receive_vector(const std::vector<size_t> &s) {
    ctr.inc();
  }

  void receive_string(const std::string &s) {
    ctr.inc();
  }


  /**
   * Short Sends With Remote Call
   */

  void perform_short_sends_0(size_t number) {
    for (size_t i = 0;i < number; ++i) {
      rmi.remote_call(1, &teststruct::receive_ints, 100,100,1000,5000000);
    }
  }

  void perform_long_sends_0(size_t length, size_t number) {
    std::vector<size_t> v(length, 5000000);
    for (size_t i = 0;i < number; ++i) {
      rmi.remote_call(1, &teststruct::receive_vector, v);
    }
  }


  void perform_string_sends_0(size_t length, size_t number) {
    std::string s(length, 1);
    for (size_t i = 0;i < number; ++i) {
      rmi.remote_call(1, &teststruct::receive_string, s);
    }
  }


  void print_res(double t1, double t2, double t3) {
    std::cout << "Calls Sent at ";
    std::cout << SEND_LIMIT / t1 / 1024 / 1024 << " MB/s\n";
    std::cout << "Receive Completed at ";
    std::cout << SEND_LIMIT / t3 / 1024 / 1024 << " MB/s\n\n";

  }
  void run_short_sends_0() {
    if (rmi.procid() == 1) {
      rmi.full_barrier();
      return;
    }
    timer ti;
    std::cout << "Single Threaded " << SEND_LIMIT_PRINT << " sends, 4 integer blocks\n";
    ti.start();
    size_t numsends = SEND_LIMIT / (sizeof(size_t) * 4);
    perform_short_sends_0(numsends);
    double t1 = ti.current_time();
    rmi.dc().flush();
    double t2 = ti.current_time();
    rmi.full_barrier();
    double t3 = ti.current_time();
    print_res(t1,t2,t3);
  }


  void run_threaded_short_sends_0(size_t numthreads) {
    if (rmi.procid() == 1) {
      rmi.full_barrier();
      return;
    }
    timer ti;
    std::cout << numthreads << " threaded " << SEND_LIMIT_PRINT << " sends, 4 integer blocks\n";
    ti.start();
    fiber_group thrgrp;
    size_t numsends = SEND_LIMIT / (sizeof(size_t) * 4 * numthreads);
    for (size_t i = 0; i < numthreads; ++i) {
      fiber_control::affinity_type affinity;
      affinity.clear(); affinity.set_bit(i % fiber_control::get_instance().num_workers());
      thrgrp.launch(boost::bind(&teststruct::perform_short_sends_0, this, numsends), affinity);
    }
    thrgrp.join();
    double t1 = ti.current_time();
    rmi.dc().flush();
    double t2 = ti.current_time();
    rmi.full_barrier();
    double t3 = ti.current_time();
    print_res(t1,t2,t3);
  }


  void run_string_sends_0(size_t length) {
    if (rmi.procid() == 1) {
      rmi.full_barrier();
      return;
    }
    timer ti;
    size_t numsends = SEND_LIMIT / (length);
    std::cout << "Single Threaded " << SEND_LIMIT_PRINT <<" sends, " << length << " bytes * "<< numsends <<  "\n";
    ti.start();
    size_t rd = rdtsc();
    perform_string_sends_0(length, numsends);
    size_t rd2 = rdtsc();
    std::cout << "Completed in: " << ti.current_time() << " seconds\n";
    std::cout << (rd2 - rd) / numsends << " cycles per call\n";
    double t1 = ti.current_time();
    rmi.dc().flush();
    std::cout << "Flush in: " << ti.current_time() << " seconds\n";
    double t2 = ti.current_time();
    rmi.full_barrier();
    std::cout << "Receive Complete in: " << ti.current_time() << " seconds\n";
    double t3 = ti.current_time();
    print_res(t1,t2,t3);
  }


  void run_threaded_string_sends_0(size_t length, size_t numthreads) {
    if (rmi.procid() == 1) {
      rmi.full_barrier();
      return;
    }
    timer ti;
    std::cout << numthreads << " threaded " << SEND_LIMIT_PRINT <<" sends, "
                                            << length << " bytes\n";
    ti.start();
    size_t numsends = SEND_LIMIT / (length * numthreads);
    size_t rd = rdtsc();
    fiber_group thrgrp;
    for (size_t i = 0; i < numthreads; ++i) {
      fiber_control::affinity_type affinity;
      affinity.clear(); affinity.set_bit(i % fiber_control::get_instance().num_workers());
      thrgrp.launch(boost::bind(&teststruct::perform_string_sends_0, this, length, numsends), affinity);
    }
    thrgrp.join();
    size_t rd2 = rdtsc();
    std::cout << (rd2 - rd) / (numthreads * numsends)  << " cycles per call\n";
    double t1 = ti.current_time();
    rmi.dc().flush();
    double t2 = ti.current_time();
    rmi.full_barrier();
    double t3 = ti.current_time();
    print_res(t1,t2,t3);
  }

};


int main(int argc, char** argv) {
  // init MPI
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
  dc.barrier();
  teststruct ts(dc);
  /*
    ts.run_short_sends_0();
    ts.run_threaded_short_sends_0(2);
    ts.run_threaded_short_sends_0(4);
    ts.run_threaded_short_sends_0(8);
    ts.run_threaded_short_sends_0(16);
    ts.run_short_pod_sends_0();
    ts.run_threaded_short_pod_sends_0(2);
    ts.run_threaded_short_pod_sends_0(4);
    ts.run_threaded_short_pod_sends_0(8);
    ts.run_threaded_short_pod_sends_0(16);
    ts.run_long_sends_0(1024);
    ts.run_threaded_long_sends_0(1024, 2);
    ts.run_threaded_long_sends_0(1024, 4);
    ts.run_threaded_long_sends_0(1024, 8);
    ts.run_threaded_long_sends_0(1024, 16);
    ts.run_long_sends_0(10240);
    ts.run_threaded_long_sends_0(10240, 2);
    ts.run_threaded_long_sends_0(10240, 4);
    ts.run_threaded_long_sends_0(10240, 8);
    ts.run_threaded_long_sends_0(10240, 16);
  */
  for (size_t i = 4; i < 24; ++i) {
    ts.run_string_sends_0(1<<i);
  }


  ts.run_threaded_string_sends_0(16, 1);
  ts.run_threaded_string_sends_0(16, 2);
  ts.run_threaded_string_sends_0(16, 4);
  ts.run_threaded_string_sends_0(16, 8);
  ts.run_threaded_string_sends_0(16, 16);
  dc.barrier();
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example1.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;


void print(int val) {
  std::cout << val << std::endl;
}

int add_one(int val) {
  return val + 1;
}


int main(int argc, char ** argv) {
  // init MPI
  global_logger().set_log_level(LOG_INFO);
  mpi_tools::init(argc, argv);
  distributed_control dc;
  
  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 1: Basic Synchronous RPC\n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
  
  if (dc.procid() == 0) {
    dc.control_call(1, print, 10);
    std::cout << "5 plus 1 is : " << dc.remote_request(1, add_one, 5) << std::endl;
    std::cout << "11 plus 1 is : " << dc.remote_request(1, add_one, 11) << std::endl;
  }
  dc.barrier();
  // terminate MPI
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example2.cpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
using namespace graphlab;


void print(std::string val) {
  std::cout << val << std::endl;
}

std::vector<int> add_one(std::vector<int> val) {
  val.push_back(1);
  return val;
}


int main(int argc, char ** argv) {
  // init MPI
  mpi_tools::init(argc, argv);
  distributed_control dc;
  
  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 2: Asynchronous RPC with Built-in Serialization\n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
  
  dc.barrier();
  if (dc.procid() == 0) {
    dc.remote_call(1, print, "hello world!");
    // Create a vector with a few elements
    std::vector<int> vec;
    vec.push_back(1); vec.push_back(2);
    // Call the remote machine 
    vec = dc.remote_request(1, add_one, vec);
    
    std::stringstream strm;
    // Print the vector 
    for (size_t i = 0; i < vec.size(); ++i) {
      strm << vec[i] << ", ";
    }
    strm << std::endl;
    strm.flush();
    dc.remote_call(1, print, strm.str());
  }
  dc.barrier();

  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example3.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <map>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;

struct teststruct: public IS_POD_TYPE{
  int a;
  double b;
};


void print(std::map<int, teststruct> &data1,  
           std::string data2) {
  std::cout << "1.a = " << data1[1].a << std::endl;
  std::cout << "10.b = " << data1[10].b << std::endl;
  std::cout << "string = " << data2 << std::endl;
}


int main(int argc, char ** argv) {
  // init MPI
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 3: Asynchronous RPC with Struct POD Serialization\n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }  
    
  if (dc.procid() == 0) {
    std::map<int, teststruct> data;
    data[1].a = 10;
    data[2].b = 15.0;
    dc.remote_call(1, print, data, "hello world!");
  }
  dc.barrier();

  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example4.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <map>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/serialization/serialization_includes.hpp>

using namespace graphlab;

struct teststruct{
  int a;
  double b;
  std::vector<std::string> name;
  
  void save(oarchive &arc) const {
    arc << a << b << name;
  };
  void load(iarchive &arc) {
    arc >> a >> b >> name;
  };
};


void print(std::map<int, teststruct> &data1,  
           std::string data2) {
  std::cout << "1.a = " << data1[1].a << std::endl;
  std::cout << "2.b = " << data1[2].b << std::endl;
  std::cout << "string = " << data2 << std::endl;
}


int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 4: Asynchronous RPC with Manual Serialization\n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
    
  if (dc.procid() == 0) {
    std::map<int, teststruct> data;
    data[1].a = 10;
    data[2].b = 15.0;
    dc.remote_call(1, print, data, "hello world!");
  }
  dc.barrier();
  
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example5.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <cstdio>
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;


int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {

    std::cout<< "RPC Example 5: Asynchronous RPC to printf \n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
  
  if (dc.procid() == 0) {
    dc.remote_call(1, printf, "%d + %f = %s\n", 1, 2.0, "three");
  }
  dc.barrier();
  
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example6.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <vector>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/generics/any.hpp>
using namespace graphlab;


void print(any val) {
  val.print(std::cout);
  std::cout << std::endl;
}


int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 6: Asynchronous RPC with any \n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }

  if (dc.procid() == 0) {
    dc.remote_call(1, print, any(15));
    dc.remote_call(1, print, any(10.5));
    dc.remote_call(1, print, any(std::string("hello world")));    
  }
    
  int i = dc.procid() == 0 ? 10 : 100;
  dc.broadcast(i, dc.procid() == 0);
  std::cout << i << std::endl;
  assert(i == 10);
  
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example7.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <cstdio>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
using namespace graphlab;

template <typename T>
class distributed_vector {
 private:
  dc_dist_object<distributed_vector<T> > rmi; // The local RMI object
  std::map<size_t, T> data;   // storage
  mutex lock;   // protect the storage
 public:
  distributed_vector(distributed_control &dc):rmi(dc, this) { };
  
  ///Reads the value at key i
  T get(size_t i) {
    // find the owning machine
    procid_t owningmachine = i % rmi.dc().numprocs();
    
    if (owningmachine == rmi.dc().procid()) {
      // if I own the data. just read and return it
      T ret;
      lock.lock();
      ret = data[i];
      lock.unlock();
      return ret;
    }
    else {
      // otherwise I need to go to another machine
      return rmi.remote_request(owningmachine, 
                                &distributed_vector<T>::get, 
                                i);
    }
  }
  
  /// Sets the value at key i
  void set(size_t i, const T& val) {
    // find the owning machine
    procid_t owningmachine = i % rmi.dc().numprocs();
    
    if (owningmachine == rmi.dc().procid()) {
      // if I own the data set it
      lock.lock();
      data[i] = val;
      lock.unlock();
    }
    else {
      // forward the write to another machine
      rmi.remote_request(owningmachine, 
                         &distributed_vector<T>::set, 
                         i, 
                         val);
    }
  }
};

int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 2) {
    std::cout<< "RPC Example 7: Distributed Object\n";
    std::cout << "Run with exactly 2 MPI nodes.\n";
    return 0;
  }
  
  size_t i = 10;
  dc.all_reduce(i);
  std::cout << i << "\n";
  // create a distributed vector
  distributed_vector<std::string> vec(dc);
  dc.barrier();
  if (dc.procid() == 0) {
    vec.set(10, "set from 0");
    vec.set(11, "set from 0");
  }
  else {
    vec.set(1, "set from 1");
    vec.set(2, "set from 1");
  }
  dc.barrier();
  if (dc.procid() == 0) {
    std::cout << vec.get(1) << "\n";  
    std::cout << vec.get(2) << "\n";  
    std::cout << vec.get(10) << "\n";
    std::cout << vec.get(11) << std::endl;
  }
  dc.barrier();
  if (dc.procid() == 1) {
    std::cout << vec.get(1) << "\n";  
    std::cout << vec.get(2) << "\n";  
    std::cout << vec.get(10) << "\n";
    std::cout << vec.get(11) << std::endl;
  }
  dc.barrier();
  
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example8.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <iostream>
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;


void print(distributed_control &dc, procid_t caller, int val) {
  std::cout << dc.procid() << ": Receiving print with value : " << val << std::endl;
}


int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 4) {
    std::cout<< "RPC Example 8: Basic Broadcast Test\n";
    std::cout << "Run with exactly 4 MPI nodes.\n";
    return 0;
  }
  
  if (dc.procid() == 0) {
    std::cout << "First set of calls... Proc 1 and 3 should receive" << std::endl;
    std::vector<procid_t> s;
    s.push_back(1); s.push_back(3);
    dc.remote_call(s.begin(), s.end(), print, 1);
  }
  dc.full_barrier();
  
  if (dc.procid() == 0) {
    std::cout << "Second set of calls... Proc 0 and 2 should receive" << std::endl;
    std::vector<procid_t> s;
    s.push_back(2); s.push_back(0);
    dc.remote_call(s.begin(), s.end(), print, 1);
  }
  dc.full_barrier();
  // terminate MPI
  mpi_tools::finalize();
}


================================================
FILE: demoapps/rpc/rpc_example9.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <iostream>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/serialization/iarchive.hpp>
using namespace graphlab;


struct test_struct {
  dc_dist_object<test_struct> rmi;
  test_struct(distributed_control &dc):rmi(dc, this) {
    dc.barrier();
  }

  void test_blob(size_t len, wild_pointer w) {
    assert(len == sizeof(procid_t));
    std::cout << "split call from : " << *reinterpret_cast<const procid_t*>(w.ptr) << "\n";
  }

  void print(int val) {
    std::cout << rmi.procid() << ": Receiving print with value : " << val << std::endl;
  }

  void test() {
    if (rmi.procid() == 0) {
      std::cout << "First set of calls... Proc 1 and 3 should receive" << std::endl;
      std::vector<procid_t> s;
      s.push_back(1); s.push_back(3);
      rmi.remote_call(s.begin(), s.end(), &test_struct::print, 1);
    }
    rmi.full_barrier();

    if (rmi.procid() == 0) {
      std::cout << "Second set of calls... Proc 0 and 2 should receive" << std::endl;
      std::vector<procid_t> s;
      s.push_back(2); s.push_back(0);
      rmi.remote_call(s.begin(), s.end(), &test_struct::print, 1);
    }
    rmi.full_barrier();

    oarchive* oarc = rmi.split_call_begin(&test_struct::test_blob);
    (*oarc) << rmi.procid();
    rmi.split_call_end(1, oarc);
    rmi.full_barrier();
  }
};

int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;

  if (dc.numprocs() != 4) {
    std::cout<< "RPC Example 8: Basic Broadcast Test\n";
    std::cout << "Run with exactly 4 MPI nodes.\n";
    return 0;
  }
  test_struct ts(dc);
  ts.test();

  // terminate MPI
  mpi_tools::finalize();
}


================================================
FILE: dist/README
================================================
This folder contains the distribution files created by the make_dist
script in the project root.

================================================
FILE: doc/README
================================================
To generate docs, run "doxygen" in the graphlab/ directory
Doxygen >= 1.8 recommended


================================================
FILE: doc/images/cycle_triangle.dot
================================================
digraph g{
  A [label="A",style=filled, color=green, shape=circle];
  b [label="", shape=circle];
  c [label="", shape=circle];
  { rank=same; b; c}
  nodesep=0.6;
  A->b ;
  A->c [dir=back];
  b->c ;
}


================================================
FILE: doc/images/example_webgraph.dot
================================================
digraph webgraph {
  a [label="1: a.com"];
  b [label="4: b.org"];
  c [label="10: c.edu"];
  d [label="11: d.gov"];
  a->b;
  a->c;
  b->c;
  c->d;
  c->a;
  d->c;
}


================================================
FILE: doc/images/graph_format_example.dot
================================================
digraph webgraph {
  a [label="1"];
  b [label="2"];
  c [label="5"];
  d [label="7"];
  a->b;
  a->c;
  c->d;
  d->a;
  d->c;
}


================================================
FILE: doc/images/in_triangle.dot
================================================
digraph g{
  A [label="A",style=filled, color=green, shape=circle];
  b [label="", shape=circle];
  c [label="", shape=circle];
  { rank=same; b; c}
  nodesep=0.6;
  A->b [dir=back];
  A->c [dir=back];
  b->c [dir=none,style=dashed];
}


================================================
FILE: doc/images/out_triangle.dot
================================================
digraph g{
  A [label="A",style=filled, color=green, shape=circle];
  b [label="", shape=circle];
  c [label="", shape=circle];
  { rank=same; b; c}
  nodesep=0.6;
  A->b;
  A->c;
  b->c [dir=none,style=dashed];
}


================================================
FILE: doc/images/through_triangle.dot
================================================
digraph g{
  A [label="A",style=filled, color=green, shape=circle];
  b [label="", shape=circle];
  c [label="", shape=circle];
  { rank=same; b; c}
  nodesep=0.6;
  A->b ;
  A->c [dir=back];
  c->b ;
}


================================================
FILE: license/LICENSE.txt
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.


================================================
FILE: license/LICENSE_prepend.txt
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


================================================
FILE: license/corporate_CLA.txt
================================================
                     Carnegie Mellon University 
               Corporate Contributor License Agreement
                             based on           
                   The Apache Software Foundation
Software Grant and Corporate Contributor License Agreement ("Agreement")
                   http://www.apache.org/licenses/
                            (v r190612)


Thank you for your interest in GraphLab. In order to clarify the
intellectual property license granted with Contributions from any
person or entity, Carnegie Mellon University ("CMU") must have a
Contributor License Agreement (CLA) on file that has been signed by
each Contributor, indicating agreement to the license terms
below. This license is for your protection as a Contributor as well as
the protection of CMU and its users; it does not change your rights to
use your own Contributions for any other purpose.

This version of the Agreement allows an entity (the "Corporation") to
submit Contributions to CMU, to authorize Contributions submitted by
its designated employees to CMU, and to grant copyright and patent
licenses thereto.

If you have not already done so, please complete and sign, then scan
and email a pdf file of this Agreement to support-graphlab@cs.cmu.edu.
Alternatively, you may send it by facsimile to +1-412-268-2205. If
necessary, send an original signed Agreement to:

  Carlos Guestrin
  6105 Gates Hillman Complex 
  Machine Learning Department 
  Carnegie Mellon University 
  5000 Forbes Avenue 
  Pittsburgh, PA 15213, U.S.A.

Please read this document carefully before signing and keep a copy for
your records.


   Corporation name:    ________________________________________________

   Corporation address: ________________________________________________

                        ________________________________________________

                        ________________________________________________

   Point of Contact:    ________________________________________________

          E-Mail:       ________________________________________________

          Telephone:    _____________________ Fax: _____________________


You accept and agree to the following terms and conditions for Your
present and future Contributions submitted to CMU. In return, CMU
shall not use Your Contributions in a way that is contrary to the
public benefit or inconsistent with its nonprofit status and bylaws in
effect at the time of the Contribution. Except for the license granted
herein to CMU and recipients of software distributed by CMU, You
reserve all right, title, and interest in and to Your Contributions.

1. Definitions.

   "You" (or "Your") shall mean the copyright owner or legal entity
   authorized by the copyright owner that is making this Agreement
   with CMU. For legal entities, the entity making a Contribution and
   all other entities that control, are controlled by, or are under
   common control with that entity are considered to be a single
   Contributor. For the purposes of this definition, "control" means
   (i) the power, direct or indirect, to cause the direction or
   management of such entity, whether by contract or otherwise, or
   (ii) ownership of fifty percent (50%) or more of the outstanding
   shares, or (iii) beneficial ownership of such entity.

   "Contribution" shall mean the code, documentation or other original
   works of authorship expressly identified in Schedule B, as well as
   any original work of authorship, including any modifications or
   additions to an existing work, that is intentionally submitted by
   You to CMU for inclusion in, or documentation of, any of the
   products owned or managed by CMU (the "Work"). For the purposes of
   this definition, "submitted" means any form of electronic, verbal,
   or written communication sent to CMU or its representatives,
   including but not limited to communication on electronic mailing
   lists, source code control systems, and issue tracking systems that
   are managed by, or on behalf of, CMU for the purpose of discussing
   and improving the Work, but excluding communication that is
   conspicuously marked or otherwise designated in writing by You as
   "Not a Contribution."

2. Grant of Copyright License. Subject to the terms and conditions of
   this Agreement, You hereby grant to CMU and to recipients of
   software distributed by CMU a perpetual, worldwide, non-exclusive,
   no-charge, royalty-free, irrevocable copyright license to
   reproduce, prepare derivative works of, publicly display, publicly
   perform, sublicense, and distribute Your Contributions and such
   derivative works.

3. Grant of Patent License. Subject to the terms and conditions of
   this Agreement, You hereby grant to CMU and to recipients of
   software distributed by CMU a perpetual, worldwide, non-exclusive,
   no-charge, royalty-free, irrevocable (except as stated in this
   section) patent license to make, have made, use, offer to sell,
   sell, import, and otherwise transfer the Work, where such license
   applies only to those patent claims licensable by You that are
   necessarily infringed by Your Contribution(s) alone or by
   combination of Your Contribution(s) with the Work to which such
   Contribution(s) were submitted. If any entity institutes patent
   litigation against You or any other entity (including a cross-claim
   or counterclaim in a lawsuit) alleging that your Contribution, or
   the Work to which you have contributed, constitutes direct or
   contributory patent infringement, then any patent licenses granted
   to that entity under this Agreement for that Contribution or Work
   shall terminate as of the date such litigation is filed.

4. You represent that You are legally entitled to grant the above
   license. You represent further that each employee of the
   Corporation designated on Schedule A below (or in a subsequent
   written modification to that Schedule) is authorized to submit
   Contributions on behalf of the Corporation.

5. You represent that each of Your Contributions is Your original
   creation (see section 7 for submissions on behalf of others).

6. You are not expected to provide support for Your Contributions,
   except to the extent You desire to provide support. You may provide
   support for free, for a fee, or not at all. Unless required by
   applicable law or agreed to in writing, You provide Your
   Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
   OF ANY KIND, either express or implied, including, without
   limitation, any warranties or conditions of TITLE,
   NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR
   PURPOSE.

7. Should You wish to submit work that is not Your original creation,
   You may submit it to CMU separately from any Contribution,
   identifying the complete details of its source and of any license
   or other restriction (including, but not limited to, related
   patents, trademarks, and license agreements) of which you are
   personally aware, and conspicuously marking the work as "Submitted
   on behalf of a third-party: [named here]".

8. It is your responsibility to notify CMU when any change is required
   to the list of designated employees authorized to submit
   Contributions on behalf of the Corporation, or to the Corporation's
   Point of Contact with CMU.


   Please sign: __________________________________ Date: _______________

   Title:       __________________________________

   Corporation: __________________________________


Schedule A

   [Initial list of designated employees.  NB: authorization is not
    tied to particular Contributions.]


Schedule B

   [Identification of optional concurrent software grant.  Would be
    left blank or omitted if there is no concurrent software grant.]


================================================
FILE: license/individual_CLA.txt
================================================
                     Carnegie Mellon University 
              Individual Contributor License Agreement
                              based on 
                    The Apache Software Foundation
     Individual Contributor License Agreement ("Agreement") V2.0
                   http://www.apache.org/licenses/

Thank you for your interest in GraphLab. In order to clarify the
intellectual property license granted with Contributions from any
person or entity, Carnegie Mellon University ("CMU") must have a
Contributor License Agreement ("CLA") on file that has been signed by
each Contributor, indicating agreement to the license terms
below. This license is for your protection as a Contributor as well as
the protection of CMU and its users; it does not change your rights to
use your own Contributions for any other purpose.

If you have not already done so, please complete and sign, then scan
and email a pdf file of this Agreement to support-graphlab@cs.cmu.edu.
Alternatively, you may send it by facsimile to +1-412-268-2205. If
necessary, send an original signed Agreement to:

  Carlos Guestrin
  6105 Gates Hillman Complex 
  Machine Learning Department 
  Carnegie Mellon University 
  5000 Forbes Avenue 
  Pittsburgh, PA 15213, U.S.A.

Please read this document carefully before signing and keep a copy for
your records.

  Full name: ______________________________________________________

  Mailing Address: ________________________________________________

  _________________________________________________________________

  Country:   ______________________________________________________

  Telephone: ______________________________________________________

  Facsimile: ______________________________________________________

  E-Mail:    ______________________________________________________
  
You accept and agree to the following terms and conditions for Your
present and future Contributions submitted to CMU. In return, CMU
shall not use Your Contributions in a way that is contrary to the
public benefit or inconsistent with its nonprofit status and bylaws in
effect at the time of the Contribution. Except for the license granted
herein to CMU and recipients of software distributed by CMU, You
reserve all right, title, and interest in and to Your Contributions.

1. Definitions.

   "You" (or "Your") shall mean the copyright owner or legal entity
   authorized by the copyright owner that is making this Agreement
   with CMU. For legal entities, the entity making a Contribution and
   all other entities that control, are controlled by, or are under
   common control with that entity are considered to be a single
   Contributor. For the purposes of this definition, "control" means
   (i) the power, direct or indirect, to cause the direction or
   management of such entity, whether by contract or otherwise, or
   (ii) ownership of fifty percent (50%) or more of the outstanding
   shares, or (iii) beneficial ownership of such entity.

   "Contribution" shall mean any original work of authorship,
   including any modifications or additions to an existing work, that
   is intentionally submitted by You to CMU for inclusion in, or
   documentation of, any of the products owned or managed by CMU (the
   "Work"). For the purposes of this definition, "submitted" means any
   form of electronic, verbal, or written communication sent to CMU or
   its representatives, including but not limited to communication on
   electronic mailing lists, source code control systems, and issue
   tracking systems that are managed by, or on behalf of, CMU for the
   purpose of discussing and improving the Work, but excluding
   communication that is conspicuously marked or otherwise designated
   in writing by You as "Not a Contribution."

2. Grant of Copyright License. Subject to the terms and conditions of
   this Agreement, You hereby grant to CMU and to recipients of
   software distributed by CMU a perpetual, worldwide, non-exclusive,
   no-charge, royalty-free, irrevocable copyright license to
   reproduce, prepare derivative works of, publicly display, publicly
   perform, sublicense, and distribute Your Contributions and such
   derivative works.

3. Grant of Patent License. Subject to the terms and conditions of
   this Agreement, You hereby grant to CMU and to recipients of
   software distributed by CMU a perpetual, worldwide, non-exclusive,
   no-charge, royalty-free, irrevocable (except as stated in this
   section) patent license to make, have made, use, offer to sell,
   sell, import, and otherwise transfer the Work, where such license
   applies only to those patent claims licensable by You that are
   necessarily infringed by Your Contribution(s) alone or by
   combination of Your Contribution(s) with the Work to which such
   Contribution(s) was submitted. If any entity institutes patent
   litigation against You or any other entity (including a cross-claim
   or counterclaim in a lawsuit) alleging that your Contribution, or
   the Work to which you have contributed, constitutes direct or
   contributory patent infringement, then any patent licenses granted
   to that entity under this Agreement for that Contribution or Work
   shall terminate as of the date such litigation is filed.

4. You represent that you are legally entitled to grant the above
   license. If your employer(s) has rights to intellectual property
   that you create that includes your Contributions, you represent
   that you have received permission to make Contributions on behalf
   of that employer, that your employer has waived such rights for
   your Contributions to CMU, or that your employer has executed a
   separate Corporate CLA with CMU.

5. You represent that each of Your Contributions is Your original
   creation (see section 7 for submissions on behalf of others).  You
   represent that Your Contribution submissions include complete
   details of any third-party license or other restriction (including,
   but not limited to, related patents and trademarks) of which you
   are personally aware and which are associated with any part of Your
   Contributions.

6. You are not expected to provide support for Your Contributions,
   except to the extent You desire to provide support. You may provide
   support for free, for a fee, or not at all. Unless required by
   applicable law or agreed to in writing, You provide Your
   Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
   OF ANY KIND, either express or implied, including, without
   limitation, any warranties or conditions of TITLE, NON-
   INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.

7. Should You wish to submit work that is not Your original creation,
   You may submit it to CMU separately from any Contribution,
   identifying the complete details of its source and of any license
   or other restriction (including, but not limited to, related
   patents, trademarks, and license agreements) of which you are
   personally aware, and conspicuously marking the work as "Submitted
   on behalf of a third-party: [named here]".

8. You agree to notify CMU of any facts or circumstances of which you
   become aware that would make these representations inaccurate in
   any respect.


Please sign: __________________________________ Date: ________________


================================================
FILE: matlab/eventlog_parser.m
================================================
function ret = eventlog_parser(eventlogfile)
f = fopen(eventlogfile);
res = textscan(f, '%s %f %f %f %f %f %f', 'Delimiter', '\t');
fclose(f);
names= res{1};
times = res{2}; 
minimum = res{3};
average = res{4};
maximum = res{5};
total = res{6};
rate = res{7};

uniquenames = unique(names);
numentries = length(names);
ret = {};
if (isempty(uniquenames))
    return
end


for i = 1:length(uniquenames)
    ret{i} = struct('name', [], ...
                'times', [], ...
                'minimum', [], ...
                'average', [], ...
                'maximum', [], ...
                'total', [], ...
                'rate', []);

    ret{i}.name = uniquenames{i};
    for j = 1:numentries
        if (strcmp(uniquenames{i}, names{j}))
            ret{i}.times = [ret{i}.times, times(j)];
            ret{i}.minimum = [ret{i}.minimum, minimum(j)];
            ret{i}.average = [ret{i}.average, average(j)];
            ret{i}.maximum = [ret{i}.maximum, maximum(j)];
            ret{i}.total = [ret{i}.total, total(j)];
            ret{i}.rate = [ret{i}.rate , rate(j)];
        end
    end
end
end

================================================
FILE: patches/boost.patch
================================================
diff -rupN boost_1_53_0_old/boost/cstdint.hpp boost_1_53_0/boost/cstdint.hpp
index ea84b65..697b67b 100644
--- boost_1_53_0_old/boost/cstdint.hpp
+++ boost_1_53_0/boost/cstdint.hpp
@@ -39,9 +39,15 @@
 // Note that GLIBC is a bit inconsistent about whether int64_t is defined or not
 // depending upon what headers happen to have been included first...
 // so we disable use of stdint.h when GLIBC does not define __GLIBC_HAVE_LONG_LONG.
-// See https://svn.boost.org/trac/boost/ticket/3548 and http://sources.redhat.com/bugzilla/show_bug.cgi?id=10990
+// See issues:
+//   https://svn.boost.org/trac/boost/ticket/3548
+//   http://sources.redhat.com/bugzilla/show_bug.cgi?id=10990
+//   https://svn.boost.org/trac/boost/ticket/8973
 //
-#if defined(BOOST_HAS_STDINT_H) && (!defined(__GLIBC__) || defined(__GLIBC_HAVE_LONG_LONG))
+#if defined(BOOST_HAS_STDINT_H)                                 \
+  && (!defined(__GLIBC__)                                       \
+      || defined(__GLIBC_HAVE_LONG_LONG)                        \
+      || (defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 17)))))
 
 // The following #include is an implementation artifact; not part of interface.
 # ifdef __hpux


================================================
FILE: patches/libbz2_fpic.patch
================================================
--- Makefile	2010-09-10 18:46:02.000000000 -0400
+++ Makefile	2012-05-31 13:21:15.211233533 -0400
@@ -21,7 +21,7 @@
 LDFLAGS=
 
 BIGFILES=-D_FILE_OFFSET_BITS=64
-CFLAGS=-Wall -Winline -O2 -g $(BIGFILES)
+CFLAGS=-fpic -fPIC -Wall -Winline -O2 -g $(BIGFILES)
 
 # Where you want it installed when you do 'make install'
 PREFIX=/usr/local


================================================
FILE: patches/libevent_clean_and_remap.sh
================================================
#!/bin/bash
if [ $# -ne 1 ]
then
  echo "Missing argument: directory where libevent libraries reside"
  exit 1
fi

CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd $1
rm -f libevent*.so
objcopy --redefine-syms=$CURDIR/libevent_remap_file.txt libevent_pthreads.a
objcopy --redefine-syms=$CURDIR/libevent_remap_file.txt libevent.a


================================================
FILE: patches/libevent_remap_file.txt
================================================
accept_socket_cb gl_accept_socket_cb
advance_last_with_data.isra.5 gl_advance_last_with_data.isra.5
arc4_count gl_arc4_count
arc4rand_lock gl_arc4rand_lock
arc4_seeded_ok gl_arc4_seeded_ok
arc4_stir gl_arc4_stir
arc4_stir_pid gl_arc4_stir_pid
be_filter_ctrl gl_be_filter_ctrl
be_filter_destruct gl_be_filter_destruct
be_filter_disable gl_be_filter_disable
be_filter_enable gl_be_filter_enable
be_filter_eventcb gl_be_filter_eventcb
be_filter_flush gl_be_filter_flush
be_filter_process_input gl_be_filter_process_input
be_filter_process_output gl_be_filter_process_output
be_filter_readcb gl_be_filter_readcb
be_filter_writecb gl_be_filter_writecb
be_null_filter gl_be_null_filter
be_pair_destruct gl_be_pair_destruct
be_pair_disable gl_be_pair_disable
be_pair_enable gl_be_pair_enable
be_pair_flush gl_be_pair_flush
be_pair_outbuf_cb gl_be_pair_outbuf_cb
be_pair_transfer gl_be_pair_transfer
be_socket_adj_timeouts gl_be_socket_adj_timeouts
be_socket_ctrl gl_be_socket_ctrl
be_socket_destruct gl_be_socket_destruct
be_socket_disable gl_be_socket_disable
be_socket_enable gl_be_socket_enable
be_socket_flush gl_be_socket_flush
be_underlying_writebuf_full.isra.2 gl_be_underlying_writebuf_full.isra.2
_bev_group_random_element.isra.0 gl__bev_group_random_element.isra.0
_bev_group_refill_callback gl__bev_group_refill_callback
_bev_group_suspend_reading gl__bev_group_suspend_reading
_bev_group_suspend_writing gl__bev_group_suspend_writing
_bev_group_unsuspend_reading gl__bev_group_unsuspend_reading
_bev_group_unsuspend_writing gl__bev_group_unsuspend_writing
_bev_refill_callback gl__bev_refill_callback
bind_socket_ai gl_bind_socket_ai
bind_socket gl_bind_socket
bracket_addr_ok gl_bracket_addr_ok
_bufferevent_add_event gl__bufferevent_add_event
bufferevent_add_to_rate_limit_group gl_bufferevent_add_to_rate_limit_group
bufferevent_base_set gl_bufferevent_base_set
bufferevent_connect_getaddrinfo_cb gl_bufferevent_connect_getaddrinfo_cb
_bufferevent_decref_and_unlock gl__bufferevent_decref_and_unlock
bufferevent_decref gl_bufferevent_decref
_bufferevent_decrement_read_buckets gl__bufferevent_decrement_read_buckets
bufferevent_decrement_read_limit gl_bufferevent_decrement_read_limit
_bufferevent_decrement_write_buckets gl__bufferevent_decrement_write_buckets
bufferevent_decrement_write_limit gl_bufferevent_decrement_write_limit
_bufferevent_del_generic_timeout_cbs gl__bufferevent_del_generic_timeout_cbs
bufferevent_disable gl_bufferevent_disable
bufferevent_disable_hard gl_bufferevent_disable_hard
bufferevent_enable gl_bufferevent_enable
bufferevent_enable_locking gl_bufferevent_enable_locking
bufferevent_filtered_outbuf_cb gl_bufferevent_filtered_outbuf_cb
bufferevent_filter_new gl_bufferevent_filter_new
bufferevent_flush gl_bufferevent_flush
bufferevent_free gl_bufferevent_free
_bufferevent_generic_adj_timeouts gl__bufferevent_generic_adj_timeouts
bufferevent_generic_read_timeout_cb gl_bufferevent_generic_read_timeout_cb
bufferevent_generic_write_timeout_cb gl_bufferevent_generic_write_timeout_cb
bufferevent_get_base gl_bufferevent_get_base
bufferevent_get_enabled gl_bufferevent_get_enabled
bufferevent_getfd gl_bufferevent_getfd
bufferevent_get_input gl_bufferevent_get_input
bufferevent_get_max_to_read gl_bufferevent_get_max_to_read
bufferevent_get_max_to_write gl_bufferevent_get_max_to_write
bufferevent_get_output gl_bufferevent_get_output
bufferevent_get_read_limit gl_bufferevent_get_read_limit
_bufferevent_get_read_max gl__bufferevent_get_read_max
bufferevent_get_underlying gl_bufferevent_get_underlying
bufferevent_get_write_limit gl_bufferevent_get_write_limit
_bufferevent_get_write_max gl__bufferevent_get_write_max
bufferevent_inbuf_wm_cb gl_bufferevent_inbuf_wm_cb
_bufferevent_incref_and_lock gl__bufferevent_incref_and_lock
bufferevent_incref gl_bufferevent_incref
bufferevent_init_common gl_bufferevent_init_common
_bufferevent_init_generic_timeout_cbs gl__bufferevent_init_generic_timeout_cbs
bufferevent_lock gl_bufferevent_lock
bufferevent_new gl_bufferevent_new
bufferevent_ops_filter gl_bufferevent_ops_filter
bufferevent_ops_pair gl_bufferevent_ops_pair
bufferevent_ops_socket gl_bufferevent_ops_socket
bufferevent_pair_elt_new gl_bufferevent_pair_elt_new
bufferevent_pair_get_partner gl_bufferevent_pair_get_partner
bufferevent_pair_new gl_bufferevent_pair_new
bufferevent_priority_set gl_bufferevent_priority_set
bufferevent_rate_limit_group_decrement_read gl_bufferevent_rate_limit_group_decrement_read
bufferevent_rate_limit_group_decrement_write gl_bufferevent_rate_limit_group_decrement_write
bufferevent_rate_limit_group_free gl_bufferevent_rate_limit_group_free
bufferevent_rate_limit_group_get_read_limit gl_bufferevent_rate_limit_group_get_read_limit
bufferevent_rate_limit_group_get_totals gl_bufferevent_rate_limit_group_get_totals
bufferevent_rate_limit_group_get_write_limit gl_bufferevent_rate_limit_group_get_write_limit
bufferevent_rate_limit_group_new gl_bufferevent_rate_limit_group_new
bufferevent_rate_limit_group_reset_totals gl_bufferevent_rate_limit_group_reset_totals
bufferevent_rate_limit_group_set_cfg gl_bufferevent_rate_limit_group_set_cfg
bufferevent_rate_limit_group_set_min_share gl_bufferevent_rate_limit_group_set_min_share
bufferevent_read_buffer gl_bufferevent_read_buffer
bufferevent_readcb gl_bufferevent_readcb
bufferevent_read gl_bufferevent_read
bufferevent_remove_from_rate_limit_group gl_bufferevent_remove_from_rate_limit_group
bufferevent_remove_from_rate_limit_group_internal gl_bufferevent_remove_from_rate_limit_group_internal
bufferevent_run_deferred_callbacks_locked gl_bufferevent_run_deferred_callbacks_locked
bufferevent_run_deferred_callbacks_unlocked gl_bufferevent_run_deferred_callbacks_unlocked
_bufferevent_run_eventcb gl__bufferevent_run_eventcb
_bufferevent_run_readcb gl__bufferevent_run_readcb
_bufferevent_run_writecb gl__bufferevent_run_writecb
bufferevent_setcb gl_bufferevent_setcb
bufferevent_setfd gl_bufferevent_setfd
bufferevent_set_rate_limit gl_bufferevent_set_rate_limit
bufferevent_settimeout gl_bufferevent_settimeout
bufferevent_set_timeouts gl_bufferevent_set_timeouts
bufferevent_setwatermark gl_bufferevent_setwatermark
bufferevent_socket_connect gl_bufferevent_socket_connect
bufferevent_socket_connect_hostname gl_bufferevent_socket_connect_hostname
bufferevent_socket_get_dns_error gl_bufferevent_socket_get_dns_error
bufferevent_socket_new gl_bufferevent_socket_new
bufferevent_socket_outbuf_cb gl_bufferevent_socket_outbuf_cb
bufferevent_suspend_read gl_bufferevent_suspend_read
bufferevent_suspend_write gl_bufferevent_suspend_write
bufferevent_unlock gl_bufferevent_unlock
bufferevent_unsuspend_read gl_bufferevent_unsuspend_read
bufferevent_unsuspend_write gl_bufferevent_unsuspend_write
bufferevent_write_buffer gl_bufferevent_write_buffer
bufferevent_writecb gl_bufferevent_writecb
bufferevent_write gl_bufferevent_write
client_error_phrases gl_client_error_phrases
common_timeout_callback gl_common_timeout_callback
common_timeout_schedule.isra.23 gl_common_timeout_schedule.isra.23
COPY_CHAIN.part.7 gl_COPY_CHAIN.part.7
CSWTCH.118 gl_CSWTCH.118
CSWTCH.190 gl_CSWTCH.190
CSWTCH.365 gl_CSWTCH.365
CSWTCH.4 gl_CSWTCH.4
current_base gl_current_base
debug_cond_wait gl_debug_cond_wait
debug_lock_alloc gl_debug_lock_alloc
debug_lock_free gl_debug_lock_free
debug_lock_lock gl_debug_lock_lock
debug_lock_unlock gl_debug_lock_unlock
decode_int64_internal.constprop.0 gl_decode_int64_internal.constprop.0
decode_int_internal gl_decode_int_internal
decode_tag_internal gl_decode_tag_internal
dnsname_to_labels gl_dnsname_to_labels
end_of_path gl_end_of_path
epoll_apply_one_change.isra.0 gl_epoll_apply_one_change.isra.0
epoll_dealloc gl_epoll_dealloc
epoll_dispatch gl_epoll_dispatch
epoll_init gl_epoll_init
epoll_nochangelist_add gl_epoll_nochangelist_add
epoll_nochangelist_del gl_epoll_nochangelist_del
epollops_changelist gl_epollops_changelist
epollops gl_epollops
error_codes.5989 gl_error_codes.5989
evbuffer_add_buffer gl_evbuffer_add_buffer
evbuffer_add_cb gl_evbuffer_add_cb
evbuffer_add_file gl_evbuffer_add_file
evbuffer_add gl_evbuffer_add
evbuffer_add_printf gl_evbuffer_add_printf
evbuffer_add_reference gl_evbuffer_add_reference
evbuffer_add_vprintf gl_evbuffer_add_vprintf
evbuffer_cb_clear_flags gl_evbuffer_cb_clear_flags
evbuffer_cb_set_flags gl_evbuffer_cb_set_flags
evbuffer_chain_align gl_evbuffer_chain_align
evbuffer_chain_insert gl_evbuffer_chain_insert
evbuffer_chain_new gl_evbuffer_chain_new
_evbuffer_chain_pin gl__evbuffer_chain_pin
_evbuffer_chain_unpin gl__evbuffer_chain_unpin
evbuffer_clear_flags gl_evbuffer_clear_flags
evbuffer_commit_space gl_evbuffer_commit_space
evbuffer_copyout gl_evbuffer_copyout
_evbuffer_decref_and_unlock gl__evbuffer_decref_and_unlock
evbuffer_defer_callbacks gl_evbuffer_defer_callbacks
evbuffer_deferred_callback gl_evbuffer_deferred_callback
evbuffer_drain gl_evbuffer_drain
evbuffer_enable_locking gl_evbuffer_enable_locking
_evbuffer_expand_fast gl__evbuffer_expand_fast
evbuffer_expand gl_evbuffer_expand
evbuffer_expand_singlechain gl_evbuffer_expand_singlechain
evbuffer_find_eol_char gl_evbuffer_find_eol_char
evbuffer_find gl_evbuffer_find
evbuffer_free gl_evbuffer_free
evbuffer_freeze gl_evbuffer_freeze
evbuffer_get_contiguous_space gl_evbuffer_get_contiguous_space
evbuffer_get_length gl_evbuffer_get_length
_evbuffer_incref_and_lock gl__evbuffer_incref_and_lock
_evbuffer_incref gl__evbuffer_incref
evbuffer_invoke_callbacks gl_evbuffer_invoke_callbacks
evbuffer_lock gl_evbuffer_lock
evbuffer_new gl_evbuffer_new
evbuffer_peek gl_evbuffer_peek
evbuffer_prepend_buffer gl_evbuffer_prepend_buffer
evbuffer_prepend gl_evbuffer_prepend
evbuffer_ptr_memcmp.isra.12 gl_evbuffer_ptr_memcmp.isra.12
evbuffer_ptr_set gl_evbuffer_ptr_set
evbuffer_pullup gl_evbuffer_pullup
evbuffer_read gl_evbuffer_read
evbuffer_readline gl_evbuffer_readline
evbuffer_readln gl_evbuffer_readln
_evbuffer_read_setup_vecs gl__evbuffer_read_setup_vecs
evbuffer_remove_all_callbacks.isra.4 gl_evbuffer_remove_all_callbacks.isra.4
evbuffer_remove_buffer gl_evbuffer_remove_buffer
evbuffer_remove_cb_entry gl_evbuffer_remove_cb_entry
evbuffer_remove_cb gl_evbuffer_remove_cb
evbuffer_remove gl_evbuffer_remove
evbuffer_reserve_space gl_evbuffer_reserve_space
evbuffer_run_callbacks gl_evbuffer_run_callbacks
evbuffer_search_eol gl_evbuffer_search_eol
evbuffer_search gl_evbuffer_search
evbuffer_search_range gl_evbuffer_search_range
evbuffer_setcb gl_evbuffer_setcb
evbuffer_set_flags gl_evbuffer_set_flags
evbuffer_set_parent gl_evbuffer_set_parent
_evbuffer_testing_use_linear_file_access gl__evbuffer_testing_use_linear_file_access
_evbuffer_testing_use_mmap gl__evbuffer_testing_use_mmap
_evbuffer_testing_use_sendfile gl__evbuffer_testing_use_sendfile
evbuffer_unfreeze gl_evbuffer_unfreeze
evbuffer_unlock gl_evbuffer_unlock
evbuffer_write_atmost gl_evbuffer_write_atmost
evbuffer_write gl_evbuffer_write
evbuffer_write_iovec.isra.14 gl_evbuffer_write_iovec.isra.14
evconnlistener_disable gl_evconnlistener_disable
evconnlistener_enable gl_evconnlistener_enable
evconnlistener_event_ops gl_evconnlistener_event_ops
evconnlistener_free gl_evconnlistener_free
evconnlistener_get_base gl_evconnlistener_get_base
evconnlistener_get_fd gl_evconnlistener_get_fd
evconnlistener_new_bind gl_evconnlistener_new_bind
evconnlistener_new gl_evconnlistener_new
evconnlistener_set_cb gl_evconnlistener_set_cb
evconnlistener_set_error_cb gl_evconnlistener_set_error_cb
evdns_add_server_port gl_evdns_add_server_port
evdns_add_server_port_with_base gl_evdns_add_server_port_with_base
evdns_base_clear_nameservers_and_suspend gl_evdns_base_clear_nameservers_and_suspend
evdns_base_count_nameservers gl_evdns_base_count_nameservers
evdns_base_free_and_unlock gl_evdns_base_free_and_unlock
evdns_base_free gl_evdns_base_free
evdns_base_load_hosts gl_evdns_base_load_hosts
evdns_base_nameserver_add gl_evdns_base_nameserver_add
evdns_base_nameserver_ip_add gl_evdns_base_nameserver_ip_add
evdns_base_nameserver_sockaddr_add gl_evdns_base_nameserver_sockaddr_add
evdns_base_new gl_evdns_base_new
evdns_base_parse_hosts_line.isra.18 gl_evdns_base_parse_hosts_line.isra.18
evdns_base_resolv_conf_parse gl_evdns_base_resolv_conf_parse
evdns_base_resolve_ipv4 gl_evdns_base_resolve_ipv4
evdns_base_resolve_ipv6 gl_evdns_base_resolve_ipv6
evdns_base_resolve_reverse gl_evdns_base_resolve_reverse
evdns_base_resolve_reverse_ipv6 gl_evdns_base_resolve_reverse_ipv6
evdns_base_resume gl_evdns_base_resume
evdns_base_search_add gl_evdns_base_search_add
evdns_base_search_clear gl_evdns_base_search_clear
evdns_base_search_ndots_set gl_evdns_base_search_ndots_set
evdns_base_set_max_requests_inflight gl_evdns_base_set_max_requests_inflight
evdns_base_set_option gl_evdns_base_set_option
evdns_base_set_option_impl gl_evdns_base_set_option_impl
evdns_cancel_request gl_evdns_cancel_request
evdns_clear_nameservers_and_suspend gl_evdns_clear_nameservers_and_suspend
evdns_close_server_port gl_evdns_close_server_port
evdns_count_nameservers gl_evdns_count_nameservers
evdns_err_to_string gl_evdns_err_to_string
evdns_getaddrinfo_cancel gl_evdns_getaddrinfo_cancel
evdns_getaddrinfo gl_evdns_getaddrinfo
evdns_getaddrinfo_gotresolve gl_evdns_getaddrinfo_gotresolve
evdns_getaddrinfo_impl gl_evdns_getaddrinfo_impl
evdns_getaddrinfo_timeout_cb gl_evdns_getaddrinfo_timeout_cb
evdns_get_global_base gl_evdns_get_global_base
evdns_init gl_evdns_init
evdns_log_fn gl_evdns_log_fn
_evdns_log gl__evdns_log
evdns_nameserver_add gl_evdns_nameserver_add
_evdns_nameserver_add_impl gl__evdns_nameserver_add_impl
evdns_nameserver_ip_add gl_evdns_nameserver_ip_add
evdns_request_insert gl_evdns_request_insert
evdns_request_remove gl_evdns_request_remove
evdns_requests_pump_waiting_queue gl_evdns_requests_pump_waiting_queue
evdns_request_timeout_callback gl_evdns_request_timeout_callback
evdns_request_transmit gl_evdns_request_transmit
evdns_resolv_conf_parse gl_evdns_resolv_conf_parse
evdns_resolve_ipv4 gl_evdns_resolve_ipv4
evdns_resolve_ipv6 gl_evdns_resolve_ipv6
evdns_resolve_reverse gl_evdns_resolve_reverse
evdns_resolve_reverse_ipv6 gl_evdns_resolve_reverse_ipv6
evdns_resume gl_evdns_resume
evdns_search_add gl_evdns_search_add
evdns_search_clear gl_evdns_search_clear
evdns_search_ndots_set gl_evdns_search_ndots_set
evdns_server_request_add_aaaa_reply gl_evdns_server_request_add_aaaa_reply
evdns_server_request_add_a_reply gl_evdns_server_request_add_a_reply
evdns_server_request_add_cname_reply gl_evdns_server_request_add_cname_reply
evdns_server_request_add_ptr_reply gl_evdns_server_request_add_ptr_reply
evdns_server_request_add_reply gl_evdns_server_request_add_reply
evdns_server_request_drop gl_evdns_server_request_drop
evdns_server_request_format_response gl_evdns_server_request_format_response
evdns_server_request_get_requesting_addr gl_evdns_server_request_get_requesting_addr
evdns_server_request_respond gl_evdns_server_request_respond
evdns_server_request_set_flags gl_evdns_server_request_set_flags
evdns_set_log_fn gl_evdns_set_log_fn
evdns_set_option gl_evdns_set_option
evdns_set_random_bytes_fn gl_evdns_set_random_bytes_fn
evdns_set_transaction_id_fn gl_evdns_set_transaction_id_fn
evdns_shutdown gl_evdns_shutdown
evdns_transmit gl_evdns_transmit
event_active gl_event_active
event_active_nolock gl_event_active_nolock
event_add gl_event_add
event_add_internal.part.22 gl_event_add_internal.part.22
event_assign gl_event_assign
event_base_add_virtual gl_event_base_add_virtual
event_base_assert_ok gl_event_base_assert_ok
event_base_del_virtual gl_event_base_del_virtual
event_base_dispatch gl_event_base_dispatch
event_base_dump_events gl_event_base_dump_events
event_base_free gl_event_base_free
event_base_get_deferred_cb_queue gl_event_base_get_deferred_cb_queue
event_base_get_features gl_event_base_get_features
event_base_get_method gl_event_base_get_method
event_base_gettimeofday_cached gl_event_base_gettimeofday_cached
event_base_got_break gl_event_base_got_break
event_base_got_exit gl_event_base_got_exit
event_base_init_common_timeout gl_event_base_init_common_timeout
event_base_loopbreak gl_event_base_loopbreak
event_base_loopexit gl_event_base_loopexit
event_base_loop gl_event_base_loop
event_base_new gl_event_base_new
event_base_new_with_config gl_event_base_new_with_config
event_base_once gl_event_base_once
event_base_priority_init gl_event_base_priority_init
event_base_set gl_event_base_set
event_base_start_iocp gl_event_base_start_iocp
event_base_stop_iocp gl_event_base_stop_iocp
event_changelist_add gl_event_changelist_add
event_changelist_del gl_event_changelist_del
event_changelist_freemem gl_event_changelist_freemem
event_changelist_get_or_construct gl_event_changelist_get_or_construct
event_changelist_init gl_event_changelist_init
event_changelist_remove_all gl_event_changelist_remove_all
event_config_avoid_method gl_event_config_avoid_method
event_config_free gl_event_config_free
event_config_new gl_event_config_new
event_config_require_features gl_event_config_require_features
event_config_set_flag gl_event_config_set_flag
event_config_set_num_cpus_hint gl_event_config_set_num_cpus_hint
event_debug_map_HT_CLEAR gl_event_debug_map_HT_CLEAR
event_debug_map_HT_GROW gl_event_debug_map_HT_GROW
_event_debug_map_HT_REP_IS_BAD gl__event_debug_map_HT_REP_IS_BAD
_event_debug_map_lock gl__event_debug_map_lock
event_debug_map_PRIMES gl_event_debug_map_PRIMES
_event_debug_mode_on gl__event_debug_mode_on
event_debug_mode_too_late gl_event_debug_mode_too_late
event_debug_unassign gl_event_debug_unassign
_event_debugx gl__event_debugx
event_deferred_cb_cancel gl_event_deferred_cb_cancel
event_deferred_cb_init gl_event_deferred_cb_init
event_deferred_cb_queue_init gl_event_deferred_cb_queue_init
event_deferred_cb_schedule gl_event_deferred_cb_schedule
event_del gl_event_del
event_del_internal.part.20 gl_event_del_internal.part.20
event_dispatch gl_event_dispatch
event_enable_debug_mode gl_event_enable_debug_mode
event_err gl_event_err
event_errx gl_event_errx
event_exit gl_event_exit
event_free gl_event_free
event_get_assignment gl_event_get_assignment
event_get_base gl_event_get_base
event_get_callback_arg gl_event_get_callback_arg
event_get_callback gl_event_get_callback
event_get_events gl_event_get_events
event_get_fd gl_event_get_fd
event_get_method gl_event_get_method
event_get_struct_event_size gl_event_get_struct_event_size
event_get_supported_methods gl_event_get_supported_methods
event_get_version gl_event_get_version
event_get_version_number gl_event_get_version_number
event_global_current_base_ gl_event_global_current_base_
event_global_setup_locks_ gl_event_global_setup_locks_
event_init gl_event_init
event_initialized gl_event_initialized
event_listener_destroy gl_event_listener_destroy
event_listener_disable gl_event_listener_disable
event_listener_enable gl_event_listener_enable
event_listener_getbase gl_event_listener_getbase
event_listener_getfd gl_event_listener_getfd
event_loopbreak gl_event_loopbreak
event_loopexit_cb gl_event_loopexit_cb
event_loopexit gl_event_loopexit
event_loop gl_event_loop
event_mm_calloc_ gl_event_mm_calloc_
event_mm_free_ gl_event_mm_free_
event_mm_malloc_ gl_event_mm_malloc_
event_mm_realloc_ gl_event_mm_realloc_
event_mm_strdup_ gl_event_mm_strdup_
event_msgx gl_event_msgx
event_new gl_event_new
event_once_cb gl_event_once_cb
event_once gl_event_once
eventops gl_eventops
event_pending gl_event_pending
event_priority_init gl_event_priority_init
event_priority_set gl_event_priority_set
event_queue_insert gl_event_queue_insert
event_queue_remove gl_event_queue_remove
event_reinit gl_event_reinit
event_set_fatal_callback gl_event_set_fatal_callback
event_set gl_event_set
event_set_log_callback gl_event_set_log_callback
event_set_mem_functions gl_event_set_mem_functions
event_sock_err gl_event_sock_err
event_sock_warn gl_event_sock_warn
_event_strlcpy gl__event_strlcpy
event_warn gl_event_warn
event_warnx gl_event_warnx
evhttp_accept_socket gl_evhttp_accept_socket
evhttp_accept_socket_with_handle gl_evhttp_accept_socket_with_handle
evhttp_add_header gl_evhttp_add_header
evhttp_add_header_internal.isra.3 gl_evhttp_add_header_internal.isra.3
evhttp_add_server_alias gl_evhttp_add_server_alias
evhttp_add_virtual_host gl_evhttp_add_virtual_host
evhttp_associate_new_request_with_connection gl_evhttp_associate_new_request_with_connection
evhttp_bind_listener gl_evhttp_bind_listener
evhttp_bind_socket gl_evhttp_bind_socket
evhttp_bind_socket_with_handle gl_evhttp_bind_socket_with_handle
evhttp_bound_socket_get_fd gl_evhttp_bound_socket_get_fd
evhttp_bound_socket_get_listener gl_evhttp_bound_socket_get_listener
evhttp_cancel_request gl_evhttp_cancel_request
evhttp_clear_headers gl_evhttp_clear_headers
evhttp_connection_base_new gl_evhttp_connection_base_new
evhttp_connection_cb_cleanup gl_evhttp_connection_cb_cleanup
evhttp_connection_cb gl_evhttp_connection_cb
evhttp_connection_connect gl_evhttp_connection_connect
evhttp_connection_connect.part.25 gl_evhttp_connection_connect.part.25
evhttp_connection_done gl_evhttp_connection_done
evhttp_connection_fail gl_evhttp_connection_fail
evhttp_connection_free gl_evhttp_connection_free
evhttp_connection_get_base gl_evhttp_connection_get_base
evhttp_connection_get_bufferevent gl_evhttp_connection_get_bufferevent
evhttp_connection_get_peer gl_evhttp_connection_get_peer
evhttp_connection_new gl_evhttp_connection_new
evhttp_connection_reset gl_evhttp_connection_reset
evhttp_connection_retry gl_evhttp_connection_retry
evhttp_connection_set_base gl_evhttp_connection_set_base
evhttp_connection_set_closecb gl_evhttp_connection_set_closecb
evhttp_connection_set_local_address gl_evhttp_connection_set_local_address
evhttp_connection_set_local_port gl_evhttp_connection_set_local_port
evhttp_connection_set_max_body_size gl_evhttp_connection_set_max_body_size
evhttp_connection_set_max_headers_size gl_evhttp_connection_set_max_headers_size
evhttp_connection_set_retries gl_evhttp_connection_set_retries
evhttp_connection_set_timeout gl_evhttp_connection_set_timeout
evhttp_decode_uri gl_evhttp_decode_uri
evhttp_decode_uri_internal gl_evhttp_decode_uri_internal
evhttp_deferred_read_cb gl_evhttp_deferred_read_cb
evhttp_del_accept_socket gl_evhttp_del_accept_socket
evhttp_del_cb gl_evhttp_del_cb
evhttp_encode_uri gl_evhttp_encode_uri
evhttp_error_cb gl_evhttp_error_cb
evhttp_find_alias gl_evhttp_find_alias
evhttp_find_header gl_evhttp_find_header
evhttp_free gl_evhttp_free
evhttp_get_body gl_evhttp_get_body
evhttp_handle_request gl_evhttp_handle_request
evhttp_htmlescape gl_evhttp_htmlescape
evhttp_is_connection_close gl_evhttp_is_connection_close
evhttp_is_connection_keepalive gl_evhttp_is_connection_keepalive
evhttp_make_header gl_evhttp_make_header
evhttp_make_request gl_evhttp_make_request
evhttp_new gl_evhttp_new
evhttp_new_object gl_evhttp_new_object
evhttp_parse_firstline gl_evhttp_parse_firstline
evhttp_parse_headers gl_evhttp_parse_headers
evhttp_parse_query gl_evhttp_parse_query
evhttp_parse_query_impl gl_evhttp_parse_query_impl
evhttp_parse_query_str gl_evhttp_parse_query_str
evhttp_read_body gl_evhttp_read_body
evhttp_read_cb gl_evhttp_read_cb
evhttp_read_header gl_evhttp_read_header
evhttp_read_trailer gl_evhttp_read_trailer
evhttp_remove_header gl_evhttp_remove_header
evhttp_remove_server_alias gl_evhttp_remove_server_alias
evhttp_remove_virtual_host gl_evhttp_remove_virtual_host
evhttp_request_dispatch gl_evhttp_request_dispatch
evhttp_request_free gl_evhttp_request_free
evhttp_request_free.part.23 gl_evhttp_request_free.part.23
evhttp_request_get_command gl_evhttp_request_get_command
evhttp_request_get_connection gl_evhttp_request_get_connection
evhttp_request_get_evhttp_uri gl_evhttp_request_get_evhttp_uri
evhttp_request_get_host gl_evhttp_request_get_host
evhttp_request_get_input_buffer gl_evhttp_request_get_input_buffer
evhttp_request_get_input_headers gl_evhttp_request_get_input_headers
evhttp_request_get_output_buffer gl_evhttp_request_get_output_buffer
evhttp_request_get_output_headers gl_evhttp_request_get_output_headers
evhttp_request_get_response_code gl_evhttp_request_get_response_code
evhttp_request_get_uri gl_evhttp_request_get_uri
evhttp_request_is_owned gl_evhttp_request_is_owned
evhttp_request_new gl_evhttp_request_new
evhttp_request_own gl_evhttp_request_own
evhttp_request_set_chunked_cb gl_evhttp_request_set_chunked_cb
evhttp_response_code gl_evhttp_response_code
evhttp_send_continue_done gl_evhttp_send_continue_done
evhttp_send_done gl_evhttp_send_done
evhttp_send_error gl_evhttp_send_error
evhttp_send_page gl_evhttp_send_page
evhttp_send.part.27 gl_evhttp_send.part.27
evhttp_send_reply_chunk gl_evhttp_send_reply_chunk
evhttp_send_reply_end gl_evhttp_send_reply_end
evhttp_send_reply gl_evhttp_send_reply
evhttp_send_reply_start gl_evhttp_send_reply_start
evhttp_set_allowed_methods gl_evhttp_set_allowed_methods
evhttp_set_cb gl_evhttp_set_cb
evhttp_set_gencb gl_evhttp_set_gencb
evhttp_set_max_body_size gl_evhttp_set_max_body_size
evhttp_set_max_headers_size gl_evhttp_set_max_headers_size
evhttp_set_timeout gl_evhttp_set_timeout
evhttp_start gl_evhttp_start
evhttp_start_read gl_evhttp_start_read
evhttp_uridecode gl_evhttp_uridecode
evhttp_uriencode gl_evhttp_uriencode
evhttp_uri_free gl_evhttp_uri_free
evhttp_uri_get_fragment gl_evhttp_uri_get_fragment
evhttp_uri_get_host gl_evhttp_uri_get_host
evhttp_uri_get_path gl_evhttp_uri_get_path
evhttp_uri_get_port gl_evhttp_uri_get_port
evhttp_uri_get_query gl_evhttp_uri_get_query
evhttp_uri_get_scheme gl_evhttp_uri_get_scheme
evhttp_uri_get_userinfo gl_evhttp_uri_get_userinfo
evhttp_uri_join gl_evhttp_uri_join
evhttp_uri_new gl_evhttp_uri_new
evhttp_uri_parse gl_evhttp_uri_parse
evhttp_uri_parse_with_flags gl_evhttp_uri_parse_with_flags
evhttp_uri_set_flags gl_evhttp_uri_set_flags
evhttp_uri_set_fragment gl_evhttp_uri_set_fragment
evhttp_uri_set_host gl_evhttp_uri_set_host
evhttp_uri_set_path gl_evhttp_uri_set_path
evhttp_uri_set_port gl_evhttp_uri_set_port
evhttp_uri_set_query gl_evhttp_uri_set_query
evhttp_uri_set_scheme gl_evhttp_uri_set_scheme
evhttp_uri_set_userinfo gl_evhttp_uri_set_userinfo
evhttp_write_buffer.constprop.31 gl_evhttp_write_buffer.constprop.31
evhttp_write_cb gl_evhttp_write_cb
evhttp_write_connectioncb gl_evhttp_write_connectioncb
evmap_check_integrity gl_evmap_check_integrity
evmap_io_active gl_evmap_io_active
evmap_io_add gl_evmap_io_add
evmap_io_clear gl_evmap_io_clear
evmap_io_del gl_evmap_io_del
evmap_io_get_fdinfo gl_evmap_io_get_fdinfo
evmap_io_initmap gl_evmap_io_initmap
evmap_make_space.constprop.1 gl_evmap_make_space.constprop.1
evmap_signal_active gl_evmap_signal_active
evmap_signal_add gl_evmap_signal_add
evmap_signal_clear gl_evmap_signal_clear
evmap_signal_del gl_evmap_signal_del
evmap_signal_initmap gl_evmap_signal_initmap
evrpc_add_hook gl_evrpc_add_hook
evrpc_construct_uri gl_evrpc_construct_uri
evrpc_free gl_evrpc_free
evrpc_get_reply gl_evrpc_get_reply
evrpc_get_request gl_evrpc_get_request
evrpc_hook_add_meta gl_evrpc_hook_add_meta
evrpc_hook_find_meta gl_evrpc_hook_find_meta
evrpc_hook_get_connection gl_evrpc_hook_get_connection
evrpc_hook_meta_new gl_evrpc_hook_meta_new
evrpc_init gl_evrpc_init
evrpc_make_request_ctx gl_evrpc_make_request_ctx
evrpc_make_request gl_evrpc_make_request
evrpc_meta_data_free gl_evrpc_meta_data_free
evrpc_pause_request gl_evrpc_pause_request
evrpc_pool_add_connection gl_evrpc_pool_add_connection
evrpc_pool_free gl_evrpc_pool_free
evrpc_pool_new gl_evrpc_pool_new
evrpc_pool_remove_connection gl_evrpc_pool_remove_connection
evrpc_pool_schedule gl_evrpc_pool_schedule
evrpc_pool_set_timeout gl_evrpc_pool_set_timeout
evrpc_register_generic gl_evrpc_register_generic
evrpc_register_rpc gl_evrpc_register_rpc
evrpc_remove_hook gl_evrpc_remove_hook
evrpc_reply_done_closure gl_evrpc_reply_done_closure
evrpc_reply_done gl_evrpc_reply_done
evrpc_reqstate_free gl_evrpc_reqstate_free
evrpc_request_cb_closure gl_evrpc_request_cb_closure
evrpc_request_cb gl_evrpc_request_cb
evrpc_request_done_closure gl_evrpc_request_done_closure
evrpc_request_done gl_evrpc_request_done
evrpc_request_get_pool gl_evrpc_request_get_pool
evrpc_request_set_cb gl_evrpc_request_set_cb
evrpc_request_set_pool gl_evrpc_request_set_pool
evrpc_request_timeout gl_evrpc_request_timeout
evrpc_request_wrapper_free gl_evrpc_request_wrapper_free
evrpc_resume_request gl_evrpc_resume_request
evrpc_schedule_request_closure gl_evrpc_schedule_request_closure
evrpc_schedule_request gl_evrpc_schedule_request
evrpc_send_request_generic gl_evrpc_send_request_generic
evrpc_unregister_rpc gl_evrpc_unregister_rpc
evsig_add gl_evsig_add
evsig_base_fd gl_evsig_base_fd
evsig_base gl_evsig_base
evsig_base_lock gl_evsig_base_lock
evsig_base_n_signals_added gl_evsig_base_n_signals_added
evsig_cb gl_evsig_cb
evsig_dealloc gl_evsig_dealloc
evsig_del gl_evsig_del
evsig_global_setup_locks_ gl_evsig_global_setup_locks_
evsig_handler gl_evsig_handler
evsig_init gl_evsig_init
evsigops gl_evsigops
_evsig_restore_handler gl__evsig_restore_handler
evsig_set_base gl_evsig_set_base
_evsig_set_handler gl__evsig_set_handler
evtag_consume gl_evtag_consume
evtag_decode_int64 gl_evtag_decode_int64
evtag_decode_int gl_evtag_decode_int
evtag_decode_tag gl_evtag_decode_tag
evtag_encode_int64 gl_evtag_encode_int64
evtag_encode_int gl_evtag_encode_int
evtag_encode_tag gl_evtag_encode_tag
evtag_init gl_evtag_init
evtag_marshal_buffer gl_evtag_marshal_buffer
evtag_marshal gl_evtag_marshal
evtag_marshal_int64 gl_evtag_marshal_int64
evtag_marshal_int gl_evtag_marshal_int
evtag_marshal_string gl_evtag_marshal_string
evtag_marshal_timeval gl_evtag_marshal_timeval
evtag_payload_length gl_evtag_payload_length
evtag_peek gl_evtag_peek
evtag_peek_length gl_evtag_peek_length
evtag_unmarshal_fixed gl_evtag_unmarshal_fixed
evtag_unmarshal gl_evtag_unmarshal
evtag_unmarshal_header gl_evtag_unmarshal_header
evtag_unmarshal_int64 gl_evtag_unmarshal_int64
evtag_unmarshal_int gl_evtag_unmarshal_int
evtag_unmarshal_string gl_evtag_unmarshal_string
evtag_unmarshal_timeval gl_evtag_unmarshal_timeval
_evthread_cond_fns gl__evthread_cond_fns
_evthread_debug_get_real_lock gl__evthread_debug_get_real_lock
evthread_debug_lock_mark_locked.isra.0 gl_evthread_debug_lock_mark_locked.isra.0
evthread_debug_lock_mark_unlocked gl_evthread_debug_lock_mark_unlocked
evthread_enable_lock_debuging gl_evthread_enable_lock_debuging
_evthread_id_fn gl__evthread_id_fn
_evthread_is_debug_lock_held gl__evthread_is_debug_lock_held
_evthread_lock_debugging_enabled gl__evthread_lock_debugging_enabled
_evthread_lock_fns gl__evthread_lock_fns
evthread_make_base_notifiable gl_evthread_make_base_notifiable
evthread_make_base_notifiable.part.28 gl_evthread_make_base_notifiable.part.28
evthread_notify_base_default gl_evthread_notify_base_default
evthread_notify_base_eventfd gl_evthread_notify_base_eventfd
evthread_notify_base gl_evthread_notify_base
evthread_notify_drain_default gl_evthread_notify_drain_default
evthread_notify_drain_eventfd gl_evthread_notify_drain_eventfd
evthread_set_condition_callbacks gl_evthread_set_condition_callbacks
evthread_set_id_callback gl_evthread_set_id_callback
evthread_set_lock_callbacks gl_evthread_set_lock_callbacks
evthread_setup_global_lock_ gl_evthread_setup_global_lock_
ev_token_bucket_cfg_free gl_ev_token_bucket_cfg_free
ev_token_bucket_cfg_new gl_ev_token_bucket_cfg_new
ev_token_bucket_get_tick gl_ev_token_bucket_get_tick
ev_token_bucket_init gl_ev_token_bucket_init
ev_token_bucket_update gl_ev_token_bucket_update
evutil_addrinfo_append gl_evutil_addrinfo_append
evutil_adjust_hints_for_addrconfig gl_evutil_adjust_hints_for_addrconfig
evutil_ascii_strcasecmp gl_evutil_ascii_strcasecmp
evutil_ascii_strncasecmp gl_evutil_ascii_strncasecmp
evutil_closesocket gl_evutil_closesocket
evutil_ersatz_socketpair gl_evutil_ersatz_socketpair
evutil_format_sockaddr_port gl_evutil_format_sockaddr_port
evutil_freeaddrinfo gl_evutil_freeaddrinfo
evutil_gai_strerror gl_evutil_gai_strerror
evutil_getaddrinfo_async gl_evutil_getaddrinfo_async
evutil_getaddrinfo_common gl_evutil_getaddrinfo_common
evutil_getaddrinfo gl_evutil_getaddrinfo
evutil_getaddrinfo_infer_protocols.isra.0 gl_evutil_getaddrinfo_infer_protocols.isra.0
evutil_getenv gl_evutil_getenv
evutil_hex_char_to_int gl_evutil_hex_char_to_int
evutil_inet_ntop gl_evutil_inet_ntop
evutil_inet_ntop.part.4 gl_evutil_inet_ntop.part.4
evutil_inet_pton gl_evutil_inet_pton
EVUTIL_ISALNUM gl_EVUTIL_ISALNUM
EVUTIL_ISALNUM_TABLE gl_EVUTIL_ISALNUM_TABLE
EVUTIL_ISALPHA gl_EVUTIL_ISALPHA
EVUTIL_ISALPHA_TABLE gl_EVUTIL_ISALPHA_TABLE
EVUTIL_ISDIGIT gl_EVUTIL_ISDIGIT
EVUTIL_ISDIGIT_TABLE gl_EVUTIL_ISDIGIT_TABLE
EVUTIL_ISLOWER gl_EVUTIL_ISLOWER
EVUTIL_ISLOWER_TABLE gl_EVUTIL_ISLOWER_TABLE
EVUTIL_ISPRINT gl_EVUTIL_ISPRINT
EVUTIL_ISPRINT_TABLE gl_EVUTIL_ISPRINT_TABLE
EVUTIL_ISSPACE gl_EVUTIL_ISSPACE
EVUTIL_ISSPACE_TABLE gl_EVUTIL_ISSPACE_TABLE
EVUTIL_ISUPPER gl_EVUTIL_ISUPPER
EVUTIL_ISUPPER_TABLE gl_EVUTIL_ISUPPER_TABLE
EVUTIL_ISXDIGIT gl_EVUTIL_ISXDIGIT
EVUTIL_ISXDIGIT_TABLE gl_EVUTIL_ISXDIGIT_TABLE
evutil_make_listen_socket_reuseable gl_evutil_make_listen_socket_reuseable
evutil_make_socket_closeonexec gl_evutil_make_socket_closeonexec
evutil_make_socket_nonblocking gl_evutil_make_socket_nonblocking
evutil_new_addrinfo gl_evutil_new_addrinfo
evutil_open_closeonexec gl_evutil_open_closeonexec
evutil_parse_sockaddr_port gl_evutil_parse_sockaddr_port
evutil_read_file gl_evutil_read_file
evutil_secure_rng_add_bytes gl_evutil_secure_rng_add_bytes
evutil_secure_rng_get_bytes gl_evutil_secure_rng_get_bytes
evutil_secure_rng_global_setup_locks_ gl_evutil_secure_rng_global_setup_locks_
evutil_secure_rng_init gl_evutil_secure_rng_init
evutil_set_evdns_getaddrinfo_fn gl_evutil_set_evdns_getaddrinfo_fn
evutil_snprintf gl_evutil_snprintf
evutil_sockaddr_cmp gl_evutil_sockaddr_cmp
evutil_sockaddr_is_loopback gl_evutil_sockaddr_is_loopback
evutil_socket_connect gl_evutil_socket_connect
evutil_socket_finished_connecting gl_evutil_socket_finished_connecting
evutil_socketpair gl_evutil_socketpair
evutil_strtoll gl_evutil_strtoll
EVUTIL_TOLOWER gl_EVUTIL_TOLOWER
EVUTIL_TOLOWER_TABLE gl_EVUTIL_TOLOWER_TABLE
EVUTIL_TOUPPER gl_EVUTIL_TOUPPER
EVUTIL_TOUPPER_TABLE gl_EVUTIL_TOUPPER_TABLE
evutil_tv_to_msec gl_evutil_tv_to_msec
evutil_vsnprintf gl_evutil_vsnprintf
_evutil_weakrand gl__evutil_weakrand
fatal_fn gl_fatal_fn
filenames.5800 gl_filenames.5800
free_getaddrinfo_request gl_free_getaddrinfo_request
__func__.4053 gl___func__.4053
__func__.4058 gl___func__.4058
__func__.4066 gl___func__.4066
__func__.4071 gl___func__.4071
__func__.4085 gl___func__.4085
__func__.4104 gl___func__.4104
__func__.4489 gl___func__.4489
__func__.4709 gl___func__.4709
__func__.4844 gl___func__.4844
__func__.4877 gl___func__.4877
__func__.4950 gl___func__.4950
__func__.5088 gl___func__.5088
__func__.5479 gl___func__.5479
__func__.5538 gl___func__.5538
__func__.5553 gl___func__.5553
__func__.5556 gl___func__.5556
__func__.5563 gl___func__.5563
__func__.5568 gl___func__.5568
__func__.5605 gl___func__.5605
__func__.5609 gl___func__.5609
__func__.5624 gl___func__.5624
__func__.5634 gl___func__.5634
__func__.5654 gl___func__.5654
__func__.5665 gl___func__.5665
__func__.5670 gl___func__.5670
__func__.5671 gl___func__.5671
__func__.5675 gl___func__.5675
__func__.5679 gl___func__.5679
__func__.5683 gl___func__.5683
__func__.5685 gl___func__.5685
__func__.5692 gl___func__.5692
__func__.5693 gl___func__.5693
__func__.5711 gl___func__.5711
__func__.5713 gl___func__.5713
__func__.5727 gl___func__.5727
__func__.5733 gl___func__.5733
__func__.5743 gl___func__.5743
__func__.5744 gl___func__.5744
__func__.5754 gl___func__.5754
__func__.5775 gl___func__.5775
__func__.5885 gl___func__.5885
__func__.5899 gl___func__.5899
__func__.5905 gl___func__.5905
__func__.5913 gl___func__.5913
__func__.5923 gl___func__.5923
__func__.5938 gl___func__.5938
__func__.5943 gl___func__.5943
__func__.5947 gl___func__.5947
__func__.5972 gl___func__.5972
__func__.5980 gl___func__.5980
__func__.5990 gl___func__.5990
__func__.6040 gl___func__.6040
__func__.6062 gl___func__.6062
__func__.6080 gl___func__.6080
__func__.6128 gl___func__.6128
__func__.6129 gl___func__.6129
__func__.6135 gl___func__.6135
__func__.6154 gl___func__.6154
__func__.6162 gl___func__.6162
__func__.6173 gl___func__.6173
__func__.6177 gl___func__.6177
__func__.6186 gl___func__.6186
__func__.6188 gl___func__.6188
__func__.6192 gl___func__.6192
__func__.6195 gl___func__.6195
__func__.6200 gl___func__.6200
__func__.6206 gl___func__.6206
__func__.6212 gl___func__.6212
__func__.6213 gl___func__.6213
__func__.6214 gl___func__.6214
__func__.6219 gl___func__.6219
__func__.6223 gl___func__.6223
__func__.6234 gl___func__.6234
__func__.6247 gl___func__.6247
__func__.6261 gl___func__.6261
__func__.6263 gl___func__.6263
__func__.6286 gl___func__.6286
__func__.6287 gl___func__.6287
__func__.6294 gl___func__.6294
__func__.6300 gl___func__.6300
__func__.6302 gl___func__.6302
__func__.6309 gl___func__.6309
__func__.6315 gl___func__.6315
__func__.6320 gl___func__.6320
__func__.6325 gl___func__.6325
__func__.6329 gl___func__.6329
__func__.6354 gl___func__.6354
__func__.6358 gl___func__.6358
__func__.6362 gl___func__.6362
__func__.6369 gl___func__.6369
__func__.6374 gl___func__.6374
__func__.6379 gl___func__.6379
__func__.6384 gl___func__.6384
__func__.6404 gl___func__.6404
__func__.6414 gl___func__.6414
__func__.6430 gl___func__.6430
__func__.6450 gl___func__.6450
__func__.6452 gl___func__.6452
__func__.6460 gl___func__.6460
__func__.6467 gl___func__.6467
__func__.6469 gl___func__.6469
__func__.6481 gl___func__.6481
__func__.6489 gl___func__.6489
__func__.6498 gl___func__.6498
__func__.6506 gl___func__.6506
__func__.6509 gl___func__.6509
__func__.6511 gl___func__.6511
__func__.6515 gl___func__.6515
__func__.6516 gl___func__.6516
__func__.6524 gl___func__.6524
__func__.6533 gl___func__.6533
__func__.6539 gl___func__.6539
__func__.6540 gl___func__.6540
__func__.6542 gl___func__.6542
__func__.6548 gl___func__.6548
__func__.6554 gl___func__.6554
__func__.6559 gl___func__.6559
__func__.6560 gl___func__.6560
__func__.6565 gl___func__.6565
__func__.6566 gl___func__.6566
__func__.6572 gl___func__.6572
__func__.6578 gl___func__.6578
__func__.6585 gl___func__.6585
__func__.6594 gl___func__.6594
__func__.6603 gl___func__.6603
__func__.6612 gl___func__.6612
__func__.6616 gl___func__.6616
__func__.6621 gl___func__.6621
__func__.6623 gl___func__.6623
__func__.6624 gl___func__.6624
__func__.6631 gl___func__.6631
__func__.6640 gl___func__.6640
__func__.6663 gl___func__.6663
__func__.6666 gl___func__.6666
__func__.6675 gl___func__.6675
__func__.6680 gl___func__.6680
__func__.6685 gl___func__.6685
__func__.6698 gl___func__.6698
__func__.6701 gl___func__.6701
__func__.6709 gl___func__.6709
__func__.6710 gl___func__.6710
__func__.6714 gl___func__.6714
__func__.6719 gl___func__.6719
__func__.6720 gl___func__.6720
__func__.6726 gl___func__.6726
__func__.6757 gl___func__.6757
__func__.6768 gl___func__.6768
__func__.6787 gl___func__.6787
__func__.6802 gl___func__.6802
__func__.6825 gl___func__.6825
__func__.6831 gl___func__.6831
__func__.6835 gl___func__.6835
__func__.6836 gl___func__.6836
__func__.6839 gl___func__.6839
__func__.6842 gl___func__.6842
__func__.6865 gl___func__.6865
__func__.6880 gl___func__.6880
__func__.6890 gl___func__.6890
__func__.6900 gl___func__.6900
__func__.6906 gl___func__.6906
__func__.6911 gl___func__.6911
__func__.6939 gl___func__.6939
__func__.6960 gl___func__.6960
__func__.6982 gl___func__.6982
__func__.7005 gl___func__.7005
__func__.7025 gl___func__.7025
__func__.7036 gl___func__.7036
__func__.7049 gl___func__.7049
__func__.7054 gl___func__.7054
__func__.7059 gl___func__.7059
__func__.7105 gl___func__.7105
__func__.7142 gl___func__.7142
__func__.7156 gl___func__.7156
__func__.7171 gl___func__.7171
__func__.7219 gl___func__.7219
__func__.7249 gl___func__.7249
__func__.7285 gl___func__.7285
__func__.7334 gl___func__.7334
__func__.7371 gl___func__.7371
__func__.7383 gl___func__.7383
__func__.7392 gl___func__.7392
__func__.7416 gl___func__.7416
__func__.7423 gl___func__.7423
__func__.7438 gl___func__.7438
__func__.7450 gl___func__.7450
__func__.7493 gl___func__.7493
__func__.7532 gl___func__.7532
__func__.7540 gl___func__.7540
__func__.7554 gl___func__.7554
__func__.7666 gl___func__.7666
__func__.7698 gl___func__.7698
__func__.7790 gl___func__.7790
__func__.7822 gl___func__.7822
__func__.7860 gl___func__.7860
__func__.7893 gl___func__.7893
__func__.7901 gl___func__.7901
__func__.7955 gl___func__.7955
__func__.8001 gl___func__.8001
__func__.8054 gl___func__.8054
__func__.8094 gl___func__.8094
__func__.8099 gl___func__.8099
__func__.8104 gl___func__.8104
__func__.8113 gl___func__.8113
__func__.8118 gl___func__.8118
__func__.8123 gl___func__.8123
gettime gl_gettime
global_debug_map gl_global_debug_map
had_ipv4_address gl_had_ipv4_address
had_ipv6_address gl_had_ipv6_address
informational_phrases gl_informational_phrases
.LC24 gl_.LC24
.LC7 gl_.LC7
listener_decref_and_unlock gl_listener_decref_and_unlock
listener_read_cb gl_listener_read_cb
log_fn gl_log_fn
LOOPBACK_S6.5935 gl_LOOPBACK_S6.5935
methods.6197 gl_methods.6197
_mm_free_fn gl__mm_free_fn
_mm_malloc_fn gl__mm_malloc_fn
_mm_realloc_fn gl__mm_realloc_fn
name_from_addr.constprop.29 gl_name_from_addr.constprop.29
name_parse gl_name_parse
nameserver_failed gl_nameserver_failed
nameserver_pick gl_nameserver_pick
nameserver_probe_callback gl_nameserver_probe_callback
nameserver_prod_callback gl_nameserver_prod_callback
nameserver_read gl_nameserver_read
nameserver_ready_callback gl_nameserver_ready_callback
nameserver_up gl_nameserver_up
nameserver_write_waiting gl_nameserver_write_waiting
need_numeric_port_hack_ gl_need_numeric_port_hack_
need_socktype_protocol_hack_ gl_need_socktype_protocol_hack_
notify_base_cbq_callback gl_notify_base_cbq_callback
.o: gl_.o:
o: gl_o:
_original_cond_fns gl__original_cond_fns
_original_lock_fns gl__original_lock_fns
parse_numeric_servname gl_parse_numeric_servname
poll_add gl_poll_add
poll_dealloc gl_poll_dealloc
poll_del gl_poll_del
poll_dispatch gl_poll_dispatch
poll_init gl_poll_init
pollops gl_pollops
prefix_suffix_match.constprop.30 gl_prefix_suffix_match.constprop.30
PRESERVE_PINNED gl_PRESERVE_PINNED
redirection_phrases gl_redirection_phrases
regname_ok gl_regname_ok
reply_handle gl_reply_handle
reply_run_callback gl_reply_run_callback
reply_schedule_callback gl_reply_schedule_callback
request_find_from_trans_id gl_request_find_from_trans_id
request_finished gl_request_finished
request_new.isra.16 gl_request_new.isra.16
request_submit gl_request_submit
resolv_conf_parse_line gl_resolv_conf_parse_line
response_classes gl_response_classes
RESTORE_PINNED.part.9 gl_RESTORE_PINNED.part.9
rs gl_rs
rs_initialized gl_rs_initialized
scheme_ok gl_scheme_ok
search_make_new.isra.10 gl_search_make_new.isra.10
search_postfix_add.isra.11 gl_search_postfix_add.isra.11
search_request_finished gl_search_request_finished
search_request_new gl_search_request_new
search_set_from_hostname gl_search_set_from_hostname
search_state_decref gl_search_state_decref
search_state_new gl_search_state_new
select_add gl_select_add
select_dealloc gl_select_dealloc
select_del gl_select_del
select_dispatch gl_select_dispatch
select_free_selectop gl_select_free_selectop
select_init gl_select_init
selectops gl_selectops
select_resize gl_select_resize
server_error_phrases gl_server_error_phrases
server_port_flush gl_server_port_flush
server_port_free gl_server_port_free
server_port_read gl_server_port_read
server_port_ready_callback gl_server_port_ready_callback
server_request_free_answers gl_server_request_free_answers
server_request_free gl_server_request_free
signals.5673 gl_signals.5673
sockaddr_getport gl_sockaddr_getport
sockaddr_setport gl_sockaddr_setport
strtoint gl_strtoint
strtotimeval gl_strtotimeval
success_phrases gl_success_phrases
tested_for_getaddrinfo_hacks gl_tested_for_getaddrinfo_hacks
test_for_getaddrinfo_hacks gl_test_for_getaddrinfo_hacks
transaction_id_pick gl_transaction_id_pick
upcast.part.0 gl_upcast.part.0
uri_chars gl_uri_chars
use_mmap gl_use_mmap
use_monotonic gl_use_monotonic
use_monotonic_initialized.6057 gl_use_monotonic_initialized.6057
userinfo_ok gl_userinfo_ok
use_sendfile gl_use_sendfile
_warn_helper gl__warn_helper
ZERO_CHAIN.part.8 gl_ZERO_CHAIN.part.8


================================================
FILE: patches/libhdfs.patch
================================================
diff -rupN hadoop-1.0.1_old/src/c++/libhdfs/hdfsJniHelper.c hadoop-1.0.1/src/c++/libhdfs/hdfsJniHelper.c
--- hadoop-1.0.1_old/src/c++/libhdfs/hdfsJniHelper.c	2012-04-03 11:46:00.000000000 -0400
+++ hadoop-1.0.1/src/c++/libhdfs/hdfsJniHelper.c	2012-04-03 11:54:18.000000000 -0400
@@ -15,7 +15,9 @@
  */
 
 #include <string.h> 
+#if !defined(__APPLE__)
 #include <error.h>
+#endif /* __APPLE__ */
 #include "hdfsJniHelper.h"
 
 static pthread_mutex_t hdfsHashMutex = PTHREAD_MUTEX_INITIALIZER;

diff -rupN hadoop-1.0.1_old/src/c++/libhdfs/configure hadoop-1.0.1/src/c++/libhdfs/configure
--- hadoop-1.0.1_old/src/c++/libhdfs/configure
+++ hadoop-1.0.1/src/c++/libhdfs/configure
@@ -12038,7 +12038,7 @@ echo "${ECHO_T}ohh u there ... $LIB_JVM_DIR" >&6; }
 if test "$supported_os" != "darwin"
 then
   CFLAGS="$CFLAGS -m${JVM_ARCH} -I$JAVA_HOME/include -I$JAVA_HOME/include/$supported_os"
-  LDFLAGS="$LDFLAGS -m${JVM_ARCH} -L$LIB_JVM_DIR -ljvm -shared -Wl,-x"
+  LDFLAGS="$LDFLAGS -m${JVM_ARCH} -L$LIB_JVM_DIR -ljvm -Wl,-x"
   { echo "$as_me:$LINENO: result: VALUE OF JVM_ARCH IS :$JVM_ARCH" >&5
 echo "${ECHO_T}VALUE OF JVM_ARCH IS :$JVM_ARCH" >&6; }
 fi

diff -rupN hadoop-1.0.1_old/src/c++/libhdfs/configure.ac hadoop-1.0.1/src/c++/libhdfs/configure.ac
--- hadoop-1.0.1_old/src/c++/libhdfs/configure.ac
+++ hadoop-1.0.1/src/c++/libhdfs/configure.ac
@@ -80,7 +80,7 @@ AP_JVM_LIBDIR()
 if test "$supported_os" != "darwin"
 then
   CFLAGS="$CFLAGS -m${JVM_ARCH} -I$JAVA_HOME/include -I$JAVA_HOME/include/$supported_os"
-  LDFLAGS="$LDFLAGS -m${JVM_ARCH} -L$LIB_JVM_DIR -ljvm -shared -Wl,-x"
+  LDFLAGS="$LDFLAGS -m${JVM_ARCH} -L$LIB_JVM_DIR -ljvm -Wl,-x"
   AC_MSG_RESULT([VALUE OF JVM_ARCH IS :$JVM_ARCH])
 fi
 
diff -rupN hadoop-1.0.1_old/src/c++/libhdfs/hdfs.c hadoop-1.0.1/src/c++/libhdfs/hdfs.c
--- hadoop-1.0.1_old/src/c++/libhdfs/hdfs.c
+++ hadoop-1.0.1/src/c++/libhdfs/hdfs.c
@@ -252,7 +252,7 @@ hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user)
       cURI = malloc(strlen(host)+16);
       sprintf(cURI, "hdfs://%s:%d", host, (int)(port));
       if (cURI == NULL) {
-        fprintf (stderr, "Couldn't allocate an object of size %d",
+        fprintf (stderr, "Couldn't allocate an object of size %zu",
                  strlen(host) + 16);
         errno = EINTERNAL;            
         goto done;    


================================================
FILE: patches/libjson.patch
================================================
diff -rup libjson2/JSONOptions.h libjson/JSONOptions.h
--- libjson2/JSONOptions.h	2012-03-10 01:41:30.000000000 -0800
+++ libjson/JSONOptions.h	2013-05-24 17:44:29.000000000 -0700
@@ -11,7 +11,7 @@
  *  JSON_LIBRARY must be declared if libjson is compiled as a static or dynamic 
  *  library.  This exposes a C-style interface, but none of the inner workings of libjson
  */
-#define JSON_LIBRARY
+// #define JSON_LIBRARY
 
 
 /*
@@ -94,7 +94,7 @@
  *  JSON_BINARY is used to support binary, which is base64 encoded and decoded by libjson,
  *  if this option is not turned off, no base64 support is included
  */
-#define JSON_BINARY
+// #define JSON_BINARY
 
 
 /*
@@ -102,7 +102,7 @@
  *  and decoding.  This may be useful if you want to obfuscate your json, or send binary data over
  *  a network
  */
-#define JSON_EXPOSE_BASE64
+// #define JSON_EXPOSE_BASE64
 
 
 /*
Only in libjson: JSONOptions.h.orig
Only in libjson: Objects_static
diff -rup libjson2/Source/JSONDefs.h libjson/Source/JSONDefs.h
--- libjson2/Source/JSONDefs.h	2012-02-23 09:30:04.000000000 -0800
+++ libjson/Source/JSONDefs.h	2013-05-24 17:44:29.000000000 -0700
@@ -146,23 +146,6 @@ typedef void (*json_free_t)(void *);
     typedef void * (*json_realloc_t)(void *, unsigned long);
 #endif
 
-#ifdef JSON_DEBUG
-    #ifdef NDEBUG
-	   #ifdef __GNUC__
-		  #warning, Have JSON_DEBUG on in a release build
-	   #else
-		  #error, Have JSON_DEBUG on in a release build
-	   #endif
-    #endif
-#else
-	#ifndef NDEBUG
-	   #ifdef __GNUC__
-		  #warning, Release build of libjson, but NDEBUG is not on
-	   #else
-		  #error, Release build of libjson, but NDEBUG is not on
-	   #endif
-	#endif
-#endif
 
 #ifdef JSON_UNIT_TEST
     #define JSON_PRIVATE public:
diff -rup libjson2/Source/JSONSharedString.h libjson/Source/JSONSharedString.h
--- libjson2/Source/JSONSharedString.h	2012-02-21 04:30:32.000000000 -0800
+++ libjson/Source/JSONSharedString.h	2013-05-24 17:56:07.000000000 -0700
@@ -38,8 +38,9 @@ public:
 
 	struct iterator;
 	  struct const_iterator {
-		const_iterator(const json_char * p, const json_shared_string * pa) : parent(pa), it(p){}
-	  
+		//const_iterator(const json_char * p, const json_shared_string * pa) : parent(pa), it(p){}
+	  const_iterator(const json_char * p, const json_shared_string * pa) : it(p){}
+
 		 inline const_iterator& operator ++(void) json_nothrow { ++it; return *this; }
 		 inline const_iterator& operator --(void) json_nothrow { --it; return *this; }
 		 inline const_iterator& operator +=(long i) json_nothrow { it += i; return *this; }
@@ -84,15 +85,15 @@ public:
 		 inline const_iterator & operator =(const const_iterator & orig) json_nothrow { it = orig.it; return *this; }
 		 const_iterator (const const_iterator & orig) json_nothrow : it(orig.it) {}
 	  private:
-		 const json_shared_string * parent;
+    // const json_shared_string * parent; // creates annoying warning
 		 const json_char * it;
 		 friend class json_shared_string;
 		 friend struct iterator;
 	  };
-	  
+
 	  struct iterator {
 		iterator(const json_char * p, const json_shared_string * pa) : parent(pa), it(p){}
-	  
+
 		 inline iterator& operator ++(void) json_nothrow { ++it; return *this; }
 		 inline iterator& operator --(void) json_nothrow { --it; return *this; }
 		 inline iterator& operator +=(long i) json_nothrow { it += i; return *this; }
@@ -146,22 +147,22 @@ public:
 
 
 	inline json_shared_string::iterator begin(void){
-		iterator res = iterator(data(), this); 
+		iterator res = iterator(data(), this);
 		return res;
 	}
 	inline json_shared_string::iterator end(void){
-		iterator res = iterator(data() + len, this); 
+		iterator res = iterator(data() + len, this);
 		return res;
 	}
 	inline json_shared_string::const_iterator begin(void) const {
-		const_iterator res = const_iterator(data(), this); 
+		const_iterator res = const_iterator(data(), this);
 		return res;
 	}
 	inline json_shared_string::const_iterator end(void) const {
-		const_iterator res = const_iterator(data() + len, this); 
+		const_iterator res = const_iterator(data() + len, this);
 		return res;
 	}
-	
+
 
 	inline json_string::iterator std_begin(void){
 		return _str -> mystring.begin() + offset;
@@ -169,36 +170,36 @@ public:
 	inline json_string::iterator std_end(void){
 		return std_begin() + len;
 	}
-	
+
 	inline json_string::const_iterator std_begin(void) const{
 		return _str -> mystring.begin() + offset;
 	}
 	inline json_string::const_iterator std_end(void) const{
 		return std_begin() + len;
 	}
-	
+
 	inline json_shared_string(void) : offset(0), len(0), _str(new(json_malloc<json_shared_string_internal>(1)) json_shared_string_internal(json_global(EMPTY_JSON_STRING))) {}
-	
+
 	inline json_shared_string(const json_string & str) : offset(0), len(str.length()), _str(new(json_malloc<json_shared_string_internal>(1)) json_shared_string_internal(str)) {}
-	
+
 	inline json_shared_string(const json_shared_string & str, size_t _offset, size_t _len) : _str(str._str), offset(str.offset + _offset), len(_len) {
 		++_str -> refCount;
 	}
-	
+
 	inline json_shared_string(const json_shared_string & str, size_t _offset) : _str(str._str), offset(str.offset + _offset), len(str.len - _offset) {
 		++_str -> refCount;
 	}
-	
+
 	inline json_shared_string(const iterator & s, const iterator & e) : _str(s.parent -> _str), offset(s.it - s.parent -> _str -> mystring.data()), len(e.it - s.it){
 		++_str -> refCount;
 	}
-	
+
 	inline ~json_shared_string(void){
 		deref();
 	}
-	
+
 	inline bool empty(void) const { return len == 0; }
-	
+
 	size_t find(json_char ch, size_t pos = 0) const {
 		if (_str -> refCount == 1) return _str -> mystring.find(ch, pos);
 		json_string::const_iterator e = std_end();
@@ -207,7 +208,7 @@ public:
 		}
 		return json_string::npos;
 	}
-	
+
 	inline json_char & operator[] (size_t loc){
 		return _str -> mystring[loc + offset];
 	}
@@ -218,21 +219,21 @@ public:
 	inline size_t length() const { return len; }
 	inline const json_char * c_str() const { return toString().c_str(); }
 	inline const json_char * data() const { return _str -> mystring.data() + offset; }
-	
+
 	inline bool operator != (const json_shared_string & other) const {
 		if ((other._str == _str) && (other.len == len) && (other.offset == offset)) return false;
 		return other.toString() != toString();
 	}
-	
+
 	inline bool operator == (const json_shared_string & other) const {
 		if ((other._str == _str) && (other.len == len) && (other.offset == offset)) return true;
 		return other.toString() == toString();
 	}
-	
+
 	inline bool operator == (const json_string & other) const {
 		return other == toString();
 	}
-	
+
 	json_string & toString(void) const {
 		//gonna have to do a real substring now anyway, so do it completely
 		if (_str -> refCount == 1){
@@ -246,8 +247,8 @@ public:
 		offset = 0;
 		return _str -> mystring;
 	}
-	
-	
+
+
 	inline void assign(const json_shared_string & other, size_t _offset, size_t _len){
 		if (other._str != _str){
 			deref();
@@ -257,11 +258,11 @@ public:
 		offset = other.offset + _offset;
 		len = _len;
 	}
-	
+
 	json_shared_string(const json_shared_string & other) : _str(other._str), offset(other.offset), len(other.len){
 		++_str -> refCount;
 	}
-	
+
 	json_shared_string & operator =(const json_shared_string & other){
 		if (other._str != _str){
 			deref();
@@ -272,13 +273,13 @@ public:
 		len = other.len;
 		return *this;
 	}
-	
+
 	json_shared_string & operator += (const json_char c){
 		toString() += c;
 		++len;
 		return *this;
 	}
-	
+
 	//when doing a plus equal of another string, see if it shares the string and starts where this one left off, in which case just increase len
 JSON_PRIVATE
 	struct json_shared_string_internal {
Only in libjson/Source: JSONSharedString.h.orig
Only in libjson/Source: JSONSharedString.h.rej
Only in libjson: libjson.a
diff -rup libjson2/makefile libjson/makefile
--- libjson2/makefile	2012-02-23 14:34:36.000000000 -0800
+++ libjson/makefile	2013-05-24 17:44:29.000000000 -0700
@@ -236,7 +236,7 @@ $(lib_target): banner installdirs $(addp
 endif
 
 # Compile object files
-$(objdir)/%.o: $(srcdir)/%.cpp
+$(objdir)/%.o: $(srcdir)/%.cpp installdirs
 	$(CXX) $< -o $@ $(CXXFLAGS)
 
 ifeq ($(SHARED),1)


================================================
FILE: patches/opencv_apple_rpath.patch
================================================
564a565
>   if(APPLE)
571c572
<     INSTALL_NAME_DIR lib
---
>     INSTALL_NAME_DIR "@rpath"
573c574,581
< 
---
>   else()
>   set_target_properties(${the_module} PROPERTIES
>     OUTPUT_NAME "${the_module}${OPENCV_DLLVERSION}"
>     DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
>     ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_PATH}
>     RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}
>     INSTALL_NAME_DIR lib)
>  endif()


================================================
FILE: patches/tcmalloc.patch
================================================
Index: src/base/linux_syscall_support.h
===================================================================
--- src/base/linux_syscall_support.h	(revision 175)
+++ src/base/linux_syscall_support.h	(working copy)
@@ -243,14 +243,13 @@
   long               ru_nivcsw;
 };
 
-struct siginfo;
 #if defined(__i386__) || defined(__arm__) || defined(__PPC__)
 
 /* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
 struct kernel_old_sigaction {
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   unsigned long      sa_mask;
   unsigned long      sa_flags;
@@ -287,13 +286,13 @@
   unsigned long      sa_flags;
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   struct kernel_sigset_t sa_mask;
 #else
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   unsigned long      sa_flags;
   void               (*sa_restorer)(void);


================================================
FILE: patches/zookeeper/Makefile
================================================
all:
	cd src/c; make

install:
	cd src/c; make install


================================================
FILE: patches/zookeeper/configure
================================================
#!/bin/bash
ant compile_jute
cd src/c
./configure $@


================================================
FILE: scripts/add_line_to_eof.sh
================================================
#!/bin/bash
for f in `find src \( -name "*.cpp" -or -name "*.hpp" \)`; do
lastline=`tail -n 1 $f`
len=$((${#lastline}))
if  [ $len -ne 0 ]; then
    echo $f
    echo -e "" >> $f
fi
done


================================================
FILE: scripts/binary_list.txt
================================================
toolkits/clustering/kmeans
toolkits/graph_analytics/undirected_triangle_count
toolkits/graph_analytics/directed_triangle_count
toolkits/graph_analytics/pagerank
toolkits/graph_analytics/kcore
toolkits/graph_analytics/format_convert
toolkits/graph_analytics/sssp
toolkits/graph_analytics/simple_coloring
toolkits/collaborative_filtering/als
toolkits/collaborative_filtering/sparse_als
toolkits/collaborative_filtering/wals
toolkits/collaborative_filtering/sgd
toolkits/collaborative_filtering/biassgd
toolkits/collaborative_filtering/svdpp
toolkits/collaborative_filtering/svd
toolkits/collaborative_filtering/nmf
toolkits/collaborative_filtering/make_synthetic_als_data
toolkits/graphical_models/loopybp_denoise
toolkits/graphical_models/structured_prediction
toolkits/graphical_models/synthetic_image_data
tookits/topic_modeling/lda_sequential_cgs
tookits/topic_modeling/cgs_lda
tookits/topic_modeling/cgs_lda_mimno_experimental


================================================
FILE: scripts/build_linux_static.sh
================================================
if [ ! -d src ]; then
  echo "Run from the graphlab root folder"
  exit
fi
./configure -D MARCH=x86-64 -D MTUNE=generic -D COMPILER_FLAGS="-static-libgcc\ -static-libstdc++"
scripts/compile_static_release.sh

# is this a openmpi or a mpich2 build?
ISOPENMPI=0
if grep -q mpi_cxx config.log
then
  rootdirname="graphlab_openmpi"
  unstrippeddirname="graphlab_openmpi_unstripped"
  ISOPENMPI=1
elif grep -q mpich config.log
then
  rootdirname="graphlab_mpich2"
  unstrippeddirname="graphlab_mpich2_unstripped"
else
  echo "Unable to detect MPI type"
  exit
fi


# now package a binary release
rm -rf ./$rootdirname
rm -rf ./$unstrippeddirname
mkdir $rootdirname
mkdir $unstrippeddirname
mkdir $rootdirname/gldeps
mkdir $unstrippeddirname/gldeps

for file in `cat scripts/binary_list.txt`
do
  dname=`dirname $file`
  fname=`basename $file`

  deps=$(ldd release/$file | awk 'BEGIN{ORS=" "}$1 \
      ~/^\//{print $1}$3~/^\//{print $3}' \
       | sed 's/,$/\n/')

  for dep in $deps
  do
    depname=`basename $dep`
    # definitely exclude jvm
    if [[ $depname == "libjvm.so" ]]; then
      continue
    fi
    if [ ! -f "$rootdirname/gldeps/$depname" ]; then
      echo "Copying $dep"
      cp "$dep" "$rootdirname/gldeps/"
      cp "$dep" "$unstrippeddirname/gldeps/"
    fi
  done

  mkdir -p $rootdirname/$dname
  cp release/$file $rootdirname/$dname/
  #strip it
  strip $rootdirname/$dname/$fname
  #package the script
  cp scripts/linux_run_script_template.sh $rootdirname/$dname/$fname.sh

  #repeat for unstripped
  mkdir -p $unstrippeddirname/$dname
  cp release/$file $unstrippeddirname/$dname/
  #package the script
  cp scripts/linux_run_script_template.sh $unstrippeddirname/$dname/$fname.sh
done

#package all the rest of the stuff
#copy the license
mkdir $rootdirname/license
cp license/LICENSE.txt $rootdirname/license/

mkdir $unstrippeddirname/license
cp license/LICENSE.txt $unstrippeddirname/license/

#copy the README
cp BINARY_README $rootdirname/README
cp BINARY_README $unstrippeddirname/README

# I am unable to get openmpi to work properly with the ld hack
# since it appears to have complicated binary dependencies. 
# (it forks and launches some other daemon which has its own dependencies)
# I will give up on this for now and try to get ABI compatibility.
# it seems like 1.3 is compatible with 1.4 and 1.5 is compatbile with 1.6
if [ ISOPENMPI -eq 1 ]; then
  rm $rootdirname/gldeps/libmpi.* $rootdirname/gldeps/libopen-*
  rm $unstrippeddirname/gldeps/libmpi.* $unstrippeddirname/gldeps/libopen-*
fi

#pack
tar -cjvf $rootdirname.tar.bz2 $rootdirname
tar -cjvf $unstrippeddirname.tar.bz2 $unstrippeddirname


================================================
FILE: scripts/build_linux_static_no_jvm.sh
================================================
if [ ! -d src ]; then
  echo "Run from the graphlab root folder"
  exit
fi
./configure -D MARCH=x86-64 -D MTUNE=generic --no_jvm -D COMPILER_FLAGS="-static-libgcc\ -static-libstdc++"
scripts/compile_static_release.sh

# is this a openmpi or a mpich2 build?
ISOPENMPI=0
if grep -q mpi_cxx config.log
then
  rootdirname="graphlab_openmpi_no_jvm"
  unstrippeddirname="graphlab_openmpi_unstripped_no_jvm"
  ISOPENMPI=1
elif grep -q mpich config.log
then
  rootdirname="graphlab_mpich2_no_jvm"
  unstrippeddirname="graphlab_mpich2_unstripped_no_jvm"
else
  echo "Unable to detect MPI type"
  exit
fi


# now package a binary release
rm -rf ./$rootdirname
rm -rf ./$unstrippeddirname
mkdir $rootdirname
mkdir $unstrippeddirname
mkdir $rootdirname/gldeps
mkdir $unstrippeddirname/gldeps

for file in `cat scripts/binary_list.txt`
do
  dname=`dirname $file`
  fname=`basename $file`

  deps=$(ldd release/$file | awk 'BEGIN{ORS=" "}$1 \
      ~/^\//{print $1}$3~/^\//{print $3}' \
       | sed 's/,$/\n/')

  for dep in $deps
  do
    depname=`basename $dep`
    # definitely exclude jvm
    if [[ $depname == "libjvm.so" ]]; then
      continue
    fi
    if [ ! -f "$rootdirname/gldeps/$depname" ]; then
      echo "Copying $dep"
      cp "$dep" "$rootdirname/gldeps/"
      cp "$dep" "$unstrippeddirname/gldeps/"
    fi
  done

  mkdir -p $rootdirname/$dname
  cp release/$file $rootdirname/$dname/
  #strip it
  strip $rootdirname/$dname/$fname
  #package the script
  cp scripts/linux_run_script_template.sh $rootdirname/$dname/$fname.sh

  #repeat for unstripped
  mkdir -p $unstrippeddirname/$dname
  cp release/$file $unstrippeddirname/$dname/
  #package the script
  cp scripts/linux_run_script_template.sh $unstrippeddirname/$dname/$fname.sh
done

#package all the rest of the stuff
#copy the license
mkdir $rootdirname/license
cp license/LICENSE.txt $rootdirname/license/

mkdir $unstrippeddirname/license
cp license/LICENSE.txt $unstrippeddirname/license/

#copy the README
cp BINARY_README $rootdirname/README
cp BINARY_README $unstrippeddirname/README

# I am unable to get openmpi to work properly with the ld hack
# since it appears to have complicated binary dependencies. 
# (it forks and launches some other daemon which has its own dependencies)
# I will give up on this for now and try to get ABI compatibility.
# it seems like 1.3 is compatible with 1.4 and 1.5 is compatbile with 1.6
if [ ISOPENMPI -eq 1 ]; then
  rm $rootdirname/gldeps/libmpi.* $rootdirname/gldeps/libopen-*
  rm $unstrippeddirname/gldeps/libmpi.* $unstrippeddirname/gldeps/libopen-*
fi

#pack
tar -cjvf $rootdirname.tar.bz2 $rootdirname
tar -cjvf $unstrippeddirname.tar.bz2 $unstrippeddirname


================================================
FILE: scripts/build_linux_static_no_jvm_no_mpi.sh
================================================
if [ ! -d src ]; then
  echo "Run from the graphlab root folder"
  exit
fi
./configure -D MARCH=x86-64 -D MTUNE=generic --no_jvm -D NO_MPI:BOOL=true -D COMPILER_FLAGS:STRING="-static-libgcc\ -static-libstdc++" 
scripts/compile_static_release.sh $@

# is this a openmpi or a mpich2 build?
rootdirname="graphlab_no_jvm_no_mpi"
unstrippeddirname="graphlab_unstripped_no_jvm_no_mpi"
ISOPENMPI=0


# now package a binary release
rm -rf ./$rootdirname
rm -rf ./$unstrippeddirname
mkdir $rootdirname
mkdir $unstrippeddirname
mkdir $rootdirname/gldeps
mkdir $unstrippeddirname/gldeps

tmp=$@
if test $# -lt 1 ; then
  tmp=`cat scripts/binary_list.txt`
fi


for file in $tmp
do
  dname=`dirname $file`
  fname=`basename $file`

  deps=$(ldd release/$file | awk 'BEGIN{ORS=" "}$1 \
      ~/^\//{print $1}$3~/^\//{print $3}' \
       | sed 's/,$/\n/')

  for dep in $deps
  do
    depname=`basename $dep`
    # definitely exclude jvm
    if [ ! -f "$rootdirname/gldeps/$depname" ]; then
      echo "Copying $dep"
      cp "$dep" "$rootdirname/gldeps/"
      cp "$dep" "$unstrippeddirname/gldeps/"
    fi
  done

  mkdir -p $rootdirname/$dname
  cp release/$file $rootdirname/$dname/
  #strip it
  strip $rootdirname/$dname/$fname
  #package the script
  cp scripts/linux_run_script_template.sh $rootdirname/$dname/$fname.sh

  #repeat for unstripped
  mkdir -p $unstrippeddirname/$dname
  cp release/$file $unstrippeddirname/$dname/
  #package the script
  cp scripts/linux_run_script_template.sh $unstrippeddirname/$dname/$fname.sh
done

#package all the rest of the stuff
#copy the license
mkdir $rootdirname/license
cp license/LICENSE.txt $rootdirname/license/

mkdir $unstrippeddirname/license
cp license/LICENSE.txt $unstrippeddirname/license/

#copy the README
cp BINARY_README $rootdirname/README
cp BINARY_README $unstrippeddirname/README

#pack
tar -cjvf $rootdirname.tar.bz2 $rootdirname
tar -cjvf $unstrippeddirname.tar.bz2 $unstrippeddirname


================================================
FILE: scripts/build_osx_static.sh
================================================
if [ ! -d src ]; then
  echo "Run from the graphlab root folder"
  exit
fi


./configure --no_jvm -D NO_MPI:BOOL=true -D COMPILER_FLAGS="-mmacosx-version-min=10.7" -D MARCH=x86-64 -D MTUNE=generic -D HAS_CRC32:BOOL=false
scripts/compile_static_release.sh

echo "Packaging binary release..."

# now package a binary release
# for whatever reason the mac binaries are quite small... 
# stripping not necessary
rootdirname="graphlab_mac"
rm -rf ./$rootdirname
mkdir $rootdirname
for file in `cat scripts/binary_list.txt`
do
  dname=`dirname $file`
  mkdir -p $rootdirname/$dname
  cp release/$file $rootdirname/$dname/
done

#package all the rest of the stuff
#copy the license
mkdir $rootdirname/license
cp license/LICENSE.txt $rootdirname/license/

#copy the README
cp BINARY_README $rootdirname/README

echo "Binary release packaged in $rootdirname"
tar -cjvf $rootdirname.tar.bz2 $rootdirname


================================================
FILE: scripts/compile_static_release.sh
================================================
#!/bin/bash
if [ ! -d release ]; then
  echo "Run from the graphlab root folder after ./configure"
else
  cd release
  make external_dependencies
  cd ..
  rm -f deps/local/lib/libboost*.so deps/local/lib/libhdfs*.so deps/local/lib/libtcmalloc*.so deps/local/lib/libevent*.so deps/local/lib/libproto*.so
  rm -f deps/local/lib/libboost*.dylib deps/local/lib/libhdfs*.dylib deps/local/lib/libtcmalloc*.dylib deps/local/lib/libevent*.dylib
  cd release

tmp=$@
if test $# -lt 1 ; then
  tmp=`cat ../scripts/binary_list.txt`
fi
echo $tmp
for file in $tmp
do
  pushd .
  dname=`dirname $file`
  fname=`basename $file`
  cd $dname
  make -j4 $fname
  popd
done
fi


================================================
FILE: scripts/ec2/benchmark_ec2.sh
================================================
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script is an example benchmarking of GraphLab for EC2
# for testing scaling
# (C) GraphLab Inc. 2013
# Please send any questions or bug reports to graphlabapi@groups.google.com
# Written by Danny Bickson


############################################################################
# CONFIGURATION
############################################################################
MAX_SLAVES=3  # configure the maximum number of slaves
MAX_RETRY=3   # configure the number of experiemnt repeats
PAGERANK=1    # if 1, runs pagerank
SVD=1         # if 1, runs svd
ALS=1         # if 1, runs als

#It is recommended to define the below two variables for easier setup
#uncomment the below two lines once you set them up
#export AWS_ACCESS_KEY_ID=[ Your access key ]
#export AWS_SECRET_ACCESS_KEY=[ Your access key secret ]
######################################################################

# clean old running instances, if any
echo "y" | ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2  destroy hpctest  
# launch ec2 cc2.8xlarge image
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -a hpc -s $MAX_SLAVES -t cc2.8xlarge launch hpctest  
# update the GraphLab version to be the latest, recompile, and update slaves
./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 update hpctest 

# run pagerank benchmarks
if [ $PAGERANK -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
  echo "Running Pagerank"
  for j in `seq 0 1 $MAX_RETRY`
  do
        ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2 -s $i pagerank_demo hpctest  
  done
done
fi

# run SVD benchmarks
if [ $SVD -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
  echo "Running SVD"
  for j in `seq 0 1 $MAX_RETRY`
  do
        ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2  -s $i svd_demo hpctest  
  done
done
fi

# run ALS benchmarks
if [ $ALS -eq 1 ]; then
for i in `seq 0 1 $MAX_SLAVES`
do
  echo "Running ALS"
  for j in `seq 0 1 $MAX_RETRY`
  do
     if [ $first_time -eq 1 ]; then
        ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2  -s $i  als_demo hpctest  
     fi
  done
done
fi

# clean everything
echo "y" | ./gl-ec2 -i ~/.ssh/amazonec2.pem -k amazonec2  destroy hpctest  


================================================
FILE: scripts/ec2/gl-ec2
================================================
#!/bin/sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cd "`dirname $0`"
PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./gl_ec2.py $@


================================================
FILE: scripts/ec2/gl_ec2.py
================================================
#ls!/usr/bin/env python
# -*- coding: utf-8 -*-

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import boto
import logging
import os
import random
import shutil
import subprocess
import sys
import tempfile
import time
import urllib2
import stat
from optparse import OptionParser
from sys import stderr
from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType

# A static URL from which to figure out the latest GraphLab EC2 AMI
STD_AMI_URL = "https://s3.amazonaws.com/GraphLabGit/graphlab2-std"
HVM_AMI_URL = "https://s3.amazonaws.com/GraphLabGit/graphlab2-hvm"

compilation_threads = 4

# Configure and parse our command-line arguments
def parse_args():
  parser = OptionParser(usage="gl-ec2 [options] <action> <cluster_name>"
      + "\n\n<action> can be: launch, destroy, login, stop, start, start-hadoop, stop-hadoop, check-hadoop, get-master, attach-ebs, detach-ebs, als_demo, svd_demo, pagerank_demo, update, update-dbg",
      add_help_option=False)
  parser.add_option("-h", "--help", action="help",
                    help="Show this help message and exit")
  parser.add_option("-s", "--slaves", type="int", default=1,
      help="Number of slaves to launch (default: 1)")
  parser.add_option("-w", "--wait", type="int", default=120,
      help="Seconds to wait for nodes to start (default: 120)")
  parser.add_option("-k", "--key-pair",
      help="The name of the ssh identitiy key")
  parser.add_option("-i", "--identity-file", 
      help="SSH private key file to use for logging into instances")
  parser.add_option("-t", "--instance-type", default="m1.xlarge",
      help="Type of instance to launch (default: m1.xlarge). " +
           "WARNING: must be 64-bit; small instances won't work")
  parser.add_option("-m", "--master-instance-type", default="",
      help="Master instance type (leave empty for same as instance-type)")
  parser.add_option("-r", "--region", default="us-west-2",
      help="EC2 region zone to launch instances in")
  parser.add_option("-z", "--zone", default="",
      help="Availability zone to launch instances in")
  parser.add_option("-a", "--ami", default="std",
      help="Amazon Machine Image ID to use, or 'hpc' to use ami for high performance instances" +
           "(default: std)")
  parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", 
      help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
            "the given local address (for use with login)")
  parser.add_option("--resume", action="store_true", default=False,
      help="Resume installation on a previously launched cluster " +
           "(for debugging)")
  parser.add_option("--ebs-vol-size", metavar="SIZE", type="int", default=0,
      help="Attach a new EBS volume of size SIZE (in GB) to each node as " +
           "/vol. The volumes will be deleted when the instances terminate. " +
           "Only possible on EBS-backed AMIs.")
  parser.add_option("--ebs-vol-id", default="", 
      help="Attach an existing EBS volume to the master node at /dev/sdh." +
           "This is a required argument for action = attach-ebs or detach-ebs." +
           "Only possible on EBS-backed AMIs.")
  parser.add_option("--swap", metavar="SWAP", type="int", default=1024,
      help="Swap space to set up per node, in MB (default: 1024)")
  parser.add_option("--spot-price", metavar="PRICE", type="float",
      help="If specified, launch slaves as spot instances with the given " +
            "maximum price (in dollars)")
  (opts, args) = parser.parse_args()
  if len(args) != 2:
    parser.print_help()
    sys.exit(1)
  (action, cluster_name) = args
  if opts.identity_file == None and action in ['launch', 'login', 'start-hadoop', 'stop-hadoop', 'check-hadoop', 'als_demo', 'svd_demo','pagerank_demo', 'update', 'update-dbg']:
    print >> stderr, ("ERROR: The -i or --identity-file argument is " +
                      "required for " + action)
    sys.exit(1)
  private_key_mode = str(oct(os.stat(opts.identity_file)[stat.ST_MODE])[-3:])
  if private_key_mode != "400" :
    print >> stderr, ("ERROR: permissions of private key file " +opts.identity_file+
                      " should be 400")
    sys.exit(1)
      
  if os.getenv('AWS_ACCESS_KEY_ID') == None:
    print >> stderr, ("ERROR: The environment variable AWS_ACCESS_KEY_ID " +
                      "must be set")
    sys.exit(1)
  if os.getenv('AWS_SECRET_ACCESS_KEY') == None:
    print >> stderr, ("ERROR: The environment variable AWS_SECRET_ACCESS_KEY " +
                      "must be set")
    sys.exit(1)

  if opts.instance_type == "m1.xlarge":
    compilation_threads = 4

  return (opts, action, cluster_name)


# Get the EC2 security group of the given name, creating it if it doesn't exist
def get_or_make_group(conn, name):
  groups = conn.get_all_security_groups()
  group = [g for g in groups if g.name == name]
  if len(group) > 0:
    return group[0]
  else:
    print "Creating security group " + name
    return conn.create_security_group(name, "GraphLab EC2 group")


# Wait for a set of launched instances to exit the "pending" state
# (i.e. either to start running or to fail and be terminated)
def wait_for_instances(conn, instances):
  while True:
    for i in instances:
      i.update()
    if len([i for i in instances if i.state == 'pending']) > 0:
      time.sleep(5)
    else:
      return


# Check whether a given EC2 instance object is in a state we consider active,
# i.e. not terminating or terminated. We count both stopping and stopped as
# active since we can restart stopped clusters.
def is_active(instance):
  return (instance.state in ['pending', 'running', 'stopping', 'stopped'])


# Launch a cluster of the given name, by setting up its security groups,
# and then starting new instances in them.
# Returns a tuple of EC2 reservation objects for the master, slave
# and zookeeper instances (in that order).
# Fails if there already instances running in the cluster's groups.
def launch_cluster(conn, opts, cluster_name):
  print "Setting up security groups..."
  master_group = get_or_make_group(conn, cluster_name + "-master")
  slave_group = get_or_make_group(conn, cluster_name + "-slaves")
  zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
  # master_group = get_or_make_group(conn, cluster_name)
  # slave_group = get_or_make_group(conn, cluster_name)
  # zoo_group = get_or_make_group(conn, cluster_name)


  if master_group.rules == []: # Group was just now created
    master_group.authorize(src_group=master_group)
    master_group.authorize(src_group=slave_group)
    master_group.authorize(src_group=zoo_group)
    master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    master_group.authorize('tcp', 0, 65535, '0.0.0.0/0')
    master_group.authorize('udp', 0, 65535, '0.0.0.0/0')
    master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
    master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
    master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
    master_group.authorize('tcp', 38090, 38090, '0.0.0.0/0')
  if slave_group.rules == []: # Group was just now created
    slave_group.authorize(src_group=master_group)
    slave_group.authorize(src_group=slave_group)
    slave_group.authorize(src_group=zoo_group)
    slave_group.authorize('tcp', 0, 65535, '0.0.0.0/0')
    slave_group.authorize('udp', 0, 65535, '0.0.0.0/0')
    slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
    slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
    slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
    slave_group.authorize('tcp', 60060, 60060, '0.0.0.0/0')
    slave_group.authorize('tcp', 60075, 60075, '0.0.0.0/0')
  if zoo_group.rules == []: # Group was just now created
    zoo_group.authorize(src_group=master_group)
    zoo_group.authorize(src_group=slave_group)
    zoo_group.authorize(src_group=zoo_group)
    zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
    zoo_group.authorize('tcp', 2888, 2888, '0.0.0.0/0')
    zoo_group.authorize('tcp', 3888, 3888, '0.0.0.0/0')

  # Check if instances are already running in our groups
  print "Checking for running cluster..."
  reservations = conn.get_all_instances()
  for res in reservations:
    group_names = [g.id for g in res.groups]
    if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
      active = [i for i in res.instances if is_active(i)]
      if len(active) > 0:
        print >> stderr, ("ERROR: There are already instances running in " +
            "group %s, %s or %s" % (master_group.name, slave_group.name, zoo_group.name))
        sys.exit(1)

  if opts.ami == "std":
    try:
      opts.ami = urllib2.urlopen(STD_AMI_URL).read().strip()
      print "GraphLab AMI for Standard Instances: " + opts.ami
    except:
      print >> stderr, "Could not read " + STD_AMI_URL
  elif opts.ami == "hpc" :
    try:
      opts.ami = urllib2.urlopen(HVM_AMI_URL).read().strip()
      print "GraphLab AMI for HPC Instances: " + opts.ami
      compilation_threads = 8
    except:
      print >> stderr, "Could not read " + HVM_AMI_URL
 
  print "Launching instances..."
  try:
    image = conn.get_all_images(image_ids=[opts.ami])[0]
  except:
    print >> stderr, "Could not find AMI " + opts.ami
    sys.exit(1)

  # Create block device mapping so that we can add an EBS volume if asked to
  block_map = BlockDeviceMapping()
  if opts.ebs_vol_size > 0:
    device = EBSBlockDeviceType()
    device.size = opts.ebs_vol_size
    device.delete_on_termination = True
    block_map["/dev/sdv"] = device

  # Launch slaves
  if opts.spot_price != None:
    # Launch spot instances with the requested price
    print ("Requesting %d slaves as spot instances with price $%.3f" %
           (opts.slaves, opts.spot_price))
    slave_reqs = conn.request_spot_instances(
        price = opts.spot_price,
        image_id = opts.ami,
        launch_group = "launch-group-%s" % cluster_name,
        placement = opts.zone,
        count = opts.slaves,
        key_name = opts.key_pair,
        security_groups = [slave_group],
        instance_type = opts.instance_type,
        block_device_map = block_map)
    my_req_ids = [req.id for req in slave_reqs]
    print "Waiting for spot instances to be granted..."
    while True:
      time.sleep(10)
      reqs = conn.get_all_spot_instance_requests()
      id_to_req = {}
      for r in reqs:
        id_to_req[r.id] = r
      active = 0
      instance_ids = []
      for i in my_req_ids:
        if id_to_req[i].state == "active":
          active += 1
          instance_ids.append(id_to_req[i].instance_id)
      if active == opts.slaves:
        print "All %d slaves granted" % opts.slaves
        reservations = conn.get_all_instances(instance_ids)
        slave_nodes = []
        for r in reservations:
          slave_nodes += r.instances
        break
      else:
        print "%d of %d slaves granted, waiting longer" % (active, opts.slaves)
  else:
    # Launch non-spot instances
    slave_res = image.run(key_name = opts.key_pair,
                          security_groups = [slave_group],
                          instance_type = opts.instance_type,
                          placement = opts.zone,
                          min_count = opts.slaves,
                          max_count = opts.slaves,
                          block_device_map = block_map)
    slave_nodes = slave_res.instances
    print "Launched slaves, regid = " + slave_res.id

  # # Launch masters
  master_type = opts.master_instance_type
  if master_type == "":
    master_type = opts.instance_type
  master_res = image.run(key_name = opts.key_pair,
                         security_groups = [master_group],
                         instance_type = master_type,
                         placement = opts.zone,
                         min_count = 1,
                         max_count = 1,
                         block_device_map = block_map)
  master_nodes = master_res.instances
  print "Launched master, regid = " + master_res.id

  zoo_nodes = []

  # Return all the instances
  return (master_nodes, slave_nodes, zoo_nodes)


# Get the EC2 instances in an existing cluster if available.
# Returns a tuple of lists of EC2 instance objects for the masters,
# slaves and zookeeper nodes (in that order).
def get_existing_cluster(conn, opts, cluster_name):
  print "Searching for existing cluster " + cluster_name + "..."
  reservations = conn.get_all_instances()
  master_nodes = []
  slave_nodes = []
  zoo_nodes = []
  for res in reservations:
    active = [i for i in res.instances if is_active(i)]
    if len(active) > 0:
      print "Acitve: ", active
      group_names = list(set(g.name for g in i.groups for i in res.instances)) #DB: bug fix as explained here: https://spark-project.atlassian.net/browse/SPARK-749
      print "Group names: ", group_names 
      if group_names == [cluster_name + "-master"]:
        master_nodes += res.instances
      elif group_names == [cluster_name + "-slaves"]:
        slave_nodes += res.instances
      elif group_names == [cluster_name + "-zoo"]:
        zoo_nodes += res.instances
  if master_nodes != [] and slave_nodes != []:
    print ("Found %d master(s), %d slaves, %d ZooKeeper nodes" %
           (len(master_nodes), len(slave_nodes), len(zoo_nodes)))
    return (master_nodes, slave_nodes, zoo_nodes)
  else:
    if master_nodes == [] and slave_nodes != []:
      print "ERROR: Could not find master in group " + cluster_name + "-master"
    elif master_nodes != [] and slave_nodes == []:
      print "ERROR: Could not find slaves in group " + cluster_name + "-slaves"
    else:
      print "ERROR: Could not find any existing cluster"
    sys.exit(1)

def get_internal_ips(conn, opts, cluster_name):
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name)
    hosts = [master_nodes[0].private_dns_name]
    for slave in slave_nodes:
      hosts.append(slave.private_dns_name)
    return hosts

def attach_ebs(conn, opts, cluster_name):
    if opts.ebs_vol_id=="": 
      print "ERROR: Please specify --ebs-vol-id"
      return False

    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name)
    masterid = master_nodes[0].id
    return conn.attach_volume(opts.ebs_vol_id,  masterid, "/dev/sdh")

def detach_ebs(conn, opts, cluster_name):
    if opts.ebs_vol_id=="": 
      print "ERROR: Please specify --ebs-vol-id"
      return False
    else:
      return conn.detach_volume(opts.ebs_vol_id)


# Deploy configuration files and run setup scripts on a newly launched
# or started EC2 cluster.
def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, cluster_name, deploy_ssh_key):
  # print "Deploying files to master..."
  # deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, zoo_nodes)
  master = master_nodes[0].public_dns_name
  if deploy_ssh_key:
    print "Copying SSH key %s to master node %s..." % (opts.identity_file,master)
    ssh(master, opts, 'sudo mkdir -p /root/.ssh; mkdir tmp')
    scp(master, opts, opts.identity_file, 'tmp/id_rsa')
    ssh(master, opts, 'sudo mv tmp/id_rsa ~/.ssh/')
    config = open("config", "w")
    config.write("StrictHostKeyChecking no\nBatchMode yes\n")
    config.close()
    scp(master, opts, "config", ".ssh/config")
    for i in slave_nodes:
       ip = i.public_dns_name    
       print "Copying SSH key %s to slave node %s..." % (opts.identity_file,ip)
       ssh(ip, opts, 'sudo mkdir -p /root/.ssh; mkdir tmp')
       scp(ip, opts, opts.identity_file, 'tmp/id_rsa')
       ssh(ip, opts, 'sudo mv tmp/id_rsa ~/.ssh/')
       scp(ip, opts, "config", ".ssh/config")
  print "Copy machines hostfile to master..."
  hosts = get_internal_ips(conn, opts, cluster_name)
  hostfile = open("machines", "w")
  for ip in hosts:
    hostfile.write("%s\n" % ip)
  hostfile.close()
  scp(master, opts, "machines", '~/machines')

  print "Running setup on master..."
  # ssh(master, opts, "chmod u+x mesos-ec2/setup")
  # ssh(master, opts, "mesos-ec2/setup %s %s %s %s" %
  #     ("generic", "none", "master", opts.swap))
  print "Done!"
  print "The master ip is : " +  master


# Wait for a whole cluster (masters, slaves and ZooKeeper) to start up
def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes, zoo_nodes):
  print "Waiting for instances to start up..."
  time.sleep(5)
  wait_for_instances(conn, master_nodes)
  wait_for_instances(conn, slave_nodes)
  if zoo_nodes != []:
    wait_for_instances(conn, zoo_nodes)
  print "Waiting %d more seconds..." % wait_secs
  time.sleep(wait_secs)


# Get number of local disks available for a given EC2 instance type.
def get_num_disks(instance_type):
  # From http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/index.html?InstanceStorage.html
  disks_by_instance = {
    "m1.small":    1,
    "m1.large":    2,
    "m1.xlarge":   4,
    "t1.micro":    1,
    "c1.medium":   1,
    "c1.xlarge":   4,
    "m2.xlarge":   1,
    "m2.2xlarge":  1,
    "m2.4xlarge":  2,
    "cc1.4xlarge": 2,
    "cc2.8xlarge": 4,
    "cg1.4xlarge": 2
  }
  if instance_type in disks_by_instance:
    return disks_by_instance[instance_type]
  else:
    print >> stderr, ("WARNING: Don't know number of disks on instance type %s; assuming 1"
                      % instance_type)
    return 1


# Deploy the configuration file templates in a given local directory to
# a cluster, filling in any template parameters with information about the
# cluster (e.g. lists of masters and slaves). Files are only deployed to
# the first master instance in the cluster, and we expect the setup
# script to be run on that instance to copy them to other nodes.
def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, zoo_nodes):
  active_master = master_nodes[0].public_dns_name

  num_disks = get_num_disks(opts.instance_type)
  hdfs_data_dirs = "/mnt/ephemeral-hdfs/data"
  mapred_local_dirs = "/mnt/hadoop/mrlocal"
  if num_disks > 1:
    for i in range(2, num_disks + 1):
      hdfs_data_dirs += ",/mnt%d/ephemeral-hdfs/data" % i
      mapred_local_dirs += ",/mnt%d/hadoop/mrlocal" % i

  if zoo_nodes != []:
    zoo_list = '\n'.join([i.public_dns_name for i in zoo_nodes])
    cluster_url = "zoo://" + ",".join(
        ["%s:2181/mesos" % i.public_dns_name for i in zoo_nodes])
  else:
    zoo_list = "NONE"
    cluster_url = "%s:5050" % active_master

  template_vars = {
    "master_list": '\n'.join([i.public_dns_name for i in master_nodes]),
    "active_master": active_master,
    "slave_list": '\n'.join([i.public_dns_name for i in slave_nodes]),
    "zoo_list": zoo_list,
    "cluster_url": cluster_url,
    "hdfs_data_dirs": hdfs_data_dirs,
    "mapred_local_dirs": mapred_local_dirs
  }

  # Create a temp directory in which we will place all the files to be
  # deployed after we substitue template parameters in them
  tmp_dir = tempfile.mkdtemp()
  for path, dirs, files in os.walk(root_dir):
    if path.find(".svn") == -1:
      dest_dir = os.path.join('/', path[len(root_dir):])
      local_dir = tmp_dir + dest_dir
      if not os.path.exists(local_dir):
        os.makedirs(local_dir)
      for filename in files:
        if filename[0] not in '#.~' and filename[-1] != '~':
          dest_file = os.path.join(dest_dir, filename)
          local_file = tmp_dir + dest_file
          with open(os.path.join(path, filename)) as src:
            with open(local_file, "w") as dest:
              text = src.read()
              for key in template_vars:
                text = text.replace("{{" + key + "}}", template_vars[key])
              dest.write(text)
              dest.close()
  # rsync the whole directory over to the master machine
  command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " + 
      "'%s/' 'ubuntu@%s:/'") % (opts.identity_file, tmp_dir, active_master))
  subprocess.check_call(command, shell=True)
  # Remove the temp directory we created above
  shutil.rmtree(tmp_dir)


# Copy a file to a given host through scp, throwing an exception if scp fails
def scp(host, opts, local_file, dest_file):
  subprocess.check_call(
      "scp -q -o StrictHostKeyChecking=no -i %s '%s' 'ubuntu@%s:%s'" %
      (opts.identity_file, local_file, host, dest_file), shell=True)


# Run a command on a host through ssh, throwing an exception if ssh fails
def ssh(host, opts, command):
  subprocess.check_call(
      "ssh -t -o StrictHostKeyChecking=no -i %s ubuntu@%s '%s'" %
      (opts.identity_file, host, command), shell=True)


def main():
  (opts, action, cluster_name) = parse_args()
  conn = boto.ec2.connect_to_region(opts.region)

  # Select an AZ at random if it was not specified.
  if opts.zone == "":
    opts.zone = random.choice(conn.get_all_zones()).name

  if action == "launch":
    if opts.resume:
      (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
          conn, opts, cluster_name)
    else:
      (master_nodes, slave_nodes, zoo_nodes) = launch_cluster(
          conn, opts, cluster_name)
      wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes)
    setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, cluster_name, True)

  elif action == "destroy":
    response = raw_input("Are you sure you want to destroy the cluster " +
        cluster_name + "?\nALL DATA ON ALL NODES WILL BE LOST!!\n" +
        "Destroy cluster " + cluster_name + " (y/N): ")
    if response == "y":
      (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
          conn, opts, cluster_name)
      print "Terminating master..."
      for inst in master_nodes:
        inst.terminate()
      print "Terminating slaves..."
      for inst in slave_nodes:
        inst.terminate()
      if zoo_nodes != []:
        print "Terminating zoo..."
        for inst in zoo_nodes:
          inst.terminate()

  elif action == "login":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Logging into master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s" %
        (opts.identity_file, proxy_opt, master), shell=True)

  elif action == "start-hadoop":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Staring hadoop on master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"export PATH=$PATH:/opt/hadoop-1.2.1/bin;
        export CLASSPATH=$CLASSPATH:.:\`hadoop classpath\`;
        export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/;
        alias mpiexec='mpiexec.openmpi -hostfile ~/machines -x CLASSPATH -x JAVA_HOME'; /home/ubuntu/graphlab/scripts/ec2_tools/setup-hadoop\"""" % (opts.identity_file, proxy_opt, master), shell=True)

  elif action == "check-hadoop":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Checking hadoop on master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"export PATH=$PATH:/opt/hadoop-1.2.1/bin;
        export CLASSPATH=$CLASSPATH:.:\`hadoop classpath\`;
        export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/;
        jps\"""" % (opts.identity_file, proxy_opt, master), shell=True)

  elif action == "stop-hadoop":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Staring hadoop on master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"export PATH=$PATH:/opt/hadoop-1.2.1/bin;
        export CLASSPATH=$CLASSPATH:.:\`hadoop classpath\`;
        export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/;
        alias mpiexec='mpiexec -hostfile ~/machines -x CLASSPATH'; /home/ubuntu/graphlab/deps/hadoop/src/hadoop/bin/stop-all.sh\"""" % (opts.identity_file, proxy_opt, master), shell=True)

  elif action == "als_demo":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Running ALS demo on master " + master + "..."
    proxy_opt = ""
    download_dataset = "rm -fR smallnetflix; mkdir smallnetflix; cd smallnetflix/; wget -q http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.validate.gz; wget http://graphlab.org/wp-content/uploads/2013/07/smallnetflix_mm.train_.gz; gunzip *.gz; mv smallnetflix_mm.train_ smallnetflix_mm.train;cd ..;"
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
        cd graphlab/release/toolkits/collaborative_filtering/;
        %s
        mpiexec.openmpi -hostfile ~/machines -n %d /home/ubuntu/graphlab/release/toolkits/collaborative_filtering/als --matrix /home/ubuntu/graphlab/release/toolkits/collaborative_filtering/smallnetflix/ --max_iter=5 --ncpus=%d --predictions=out_predictions --minval=1 --maxval=5 --D=100;
        \"""" % (opts.identity_file, proxy_opt, master, ("" if opts.resume else download_dataset), opts.slaves+1,compilation_threads), shell=True)
  elif action == "pagerank_demo":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Running pagerank demo on master " + master + "..."
    proxy_opt = ""
    download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget -q http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
        cd /home/ubuntu/graphlab/release/toolkits/graph_analytics/;
        %s
        mpiexec.openmpi -hostfile ~/machines -n %d /home/ubuntu/graphlab/release/toolkits/graph_analytics/pagerank --graph=/home/ubuntu/graphlab/release/toolkits/graph_analytics/livejournal/ --format=tsv --ncpus=%d --iterations=5 ;
        \"""" % (opts.identity_file, proxy_opt, master,("" if opts.resume else download_dataset), opts.slaves+1,compilation_threads), shell=True)
  elif action == "svd_demo":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster( conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Running SVD demo on master " + master + "..."
    proxy_opt = ""
    download_dataset = "rm -fR livejournal; mkdir livejournal; cd livejournal/; wget -q http://snap.stanford.edu/data/soc-LiveJournal1.txt.gz; gunzip *.gz; cd ..;"
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
        cd graphlab/release/toolkits/collaborative_filtering/;
        %s 
        mpiexec.openmpi -hostfile ~/machines  -n %d /home/ubuntu/graphlab/release/toolkits/collaborative_filtering/svd --matrix /home/ubuntu/graphlab/release/toolkits/collaborative_filtering/livejournal --rows=4847572 --cols=4847571 --nsv=2 --nv=7 --max_iter=3 --tol=1e-2 --binary=true --save_vectors=1 --ncpus=%d --input_file_offset=0 --ortho_repeats=1 ;
        \"""" % (opts.identity_file, proxy_opt, master, ("" if opts.resume else download_dataset), opts.slaves+1, compilation_threads), shell=True)


  elif action == "update":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Running software update on master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    scp(master, opts, "machines", '~/machines')
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"export PATH=$PATH:/bin/hadoop-1.2.1/bin/;
        export CLASSPATH=$CLASSPATH:.:`/bin/hadoop-1.2.1/bin/hadoop classpath`;
        export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/;
        alias mpiexec='mpiexec -hostfile ~/machines -x CLASSPATH'; 
        sudo chmod -R a+rx /home/ubuntu/graphlab/deps/hadoop/; #DB: ugly, but sovles libhdfs bug
        cd graphlab/;
        git pull;
        ./configure; 
        cd release/toolkits/collaborative_filtering/; 
        make -j %d; 
        cd ../graph_analytics/;
        make -j %d;
        cd ~/graphlab/release/toolkits;  
        bash -x ~/graphlab/scripts/mpirsync
        \"""" % (opts.identity_file, proxy_opt, master, compilation_threads, compilation_threads), shell=True)

  elif action == "update-dbg":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    master = master_nodes[0].public_dns_name
    print "Running software update on master " + master + "..."
    proxy_opt = ""
    if opts.proxy_port != None:
      proxy_opt = "-D " + opts.proxy_port
    subprocess.check_call("""ssh -o StrictHostKeyChecking=no -i %s %s ubuntu@%s \"
        sudo apt-get install gdb; 
        cd graphlab/;
        hg pull; hg update; ./configure; cd debug; make; cd ~/graphlab/debug/toolkits;  ~/graphlab/scripts/mpirsync
        \"""" % (opts.identity_file, proxy_opt, master), shell=True)

  elif action == "get-master":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(conn, opts, cluster_name)
    print master_nodes[0].public_dns_name
    

  elif action == "stop":
    response = raw_input("Are you sure you want to stop the cluster " +
        cluster_name + "?\nDATA ON EPHEMERAL DISKS WILL BE LOST, " +
        "BUT THE CLUSTER WILL KEEP USING SPACE ON\n" + 
        "AMAZON EBS IF IT IS EBS-BACKED!!\n" +
        "Stop cluster " + cluster_name + " (y/N): ")
    if response == "y":
      (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
          conn, opts, cluster_name)
      print "Stopping master..."
      for inst in master_nodes:
        if inst.state not in ["shutting-down", "terminated"]:
          inst.stop()
      print "Stopping slaves..."
      for inst in slave_nodes:
        if inst.state not in ["shutting-down", "terminated"]:
          inst.stop()
      if zoo_nodes != []:
        print "Stopping zoo..."
        for inst in zoo_nodes:
          if inst.state not in ["shutting-down", "terminated"]:
            inst.stop()

  elif action == "start":
    (master_nodes, slave_nodes, zoo_nodes) = get_existing_cluster(
        conn, opts, cluster_name)
    print "Starting slaves..."
    for inst in slave_nodes:
      if inst.state not in ["shutting-down", "terminated"]:
        inst.start()
    print "Starting master..."
    for inst in master_nodes:
      if inst.state not in ["shutting-down", "terminated"]:
        inst.start()
    if zoo_nodes != []:
      print "Starting zoo..."
      for inst in zoo_nodes:
        if inst.state not in ["shutting-down", "terminated"]:
          inst.start()
    wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes, zoo_nodes)
    setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, cluster_name, False)

  elif action == "attach-ebs":
    success = attach_ebs(conn, opts, cluster_name)
    if success:
      print "ebs has been attached to masternode at /dev/sdh."

  elif action == "detach-ebs":
    success = detach_ebs(conn, opts, cluster_name)
    if success:
      print "ebs has been detached."


  else:
    print >> stderr, "Invalid action: %s" % action
    sys.exit(1)


if __name__ == "__main__":
  logging.basicConfig()
  main()


================================================
FILE: scripts/ec2/readme
================================================
*The new ec2 aims to simplify the procedure of launching EC2 nodes with ready-to-go GraphLab environment.
**The scripts are adapted from Spark's ec2 script. This document is also a variant of Spark's EC2 Script document at https://github.com/mesos/spark/wiki/EC2-Scripts


Before you start:
Create an Amazon EC2 key pair for yourself. This can be done by logging into your Amazon Web Services account through the AWS console, clicking Key Pairs on the left sidebar, and creating and downloading a key. Make sure that you set the permissions for the private key file to 600 (i.e. only you can read and write it) so that ssh will work.

Whenever you want to use the gl-ec2 script, set the environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY to your Amazon EC2 access key ID and secret access key. These can be obtained from the AWS homepage by clicking Account > Security Credentials > Access Credentials.


Launching a Cluster
Go into the scripts/ec2 directory in the release of GraphLab you downloaded.
Run ./gl-ec2 -k <keypair> -i <key-file> -s <num-slaves> launch <cluster-name>, where <keypair> is the name of your EC2 key pair (that you gave it when you created it), <key-file> is the private key file for your key pair, <num-slaves> is the number of slave nodes to launch (try 1 at first), and <cluster-name> is the name to give to your cluster.
You can also run ./gl-ec2 --help to see more usage options.

The following options are worth pointing out:
--ami={"std", "hpc", AMIID} can be used to specify the GraphLab AMI. The default is "std" for a standard cluster image, and "hpc" is for HPC cluster image. You can also specify your own AMIID. Notice that the ami you choose should be compatible with the instance type you use (see below). "std" and "hpc" refers to the latest ami we have. The ami id is stored on our s3 bucket named graphlabv2-ami. We can update the ami pointer there.

--instance-type=<INSTANCE_TYPE> can be used to specify an EC2 instance type to use. The default type is m1.large (which has 2 cores and 7.5 GB RAM). Refer to the Amazon pages about EC2 instance types and EC2 pricing for information about other instance types. If you choose "hpc" as your ami above, you need to use cc type instance.

--zone=<EC2_ZONE> can be used to specify an EC2 availability zone to launch instances in. Sometimes, you will get an error because there is not enough capacity in one zone, and you should try to launch in another. This happens mostly with the m1.large instance types; extra-large (both m1.xlarge and c1.xlarge) instances tend to be more available.

--ebs-vol-size=GB will attach an EBS volume with a given amount of space to each node so that you can have a persistent HDFS cluster on your nodes across cluster restarts (see below).
If one of your launches fails due to e.g. not having the right permissions on your private key file, you can run launch with the --resume option to restart the setup process on an existing cluster.

--ebs-vol-id=<EBS_VOL_ID> can be used to specify an ebs volume to be attached or detached. The availability zone of the volume must be the same as your instances.


Here are a few common use cases:

1. Start a 32 nodes (31 slaves + 1 master) standard cluster named "test":

./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem -s 31 launch test

Or Start a 32 nodes hpc cluster named "test-hpc" :

./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem -s 31 --ami hpc --instance-type cc1.4xlarge launch test-hpc

2. You can see the ip address of the master node you just created by:
./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem get-master test

3. You can attach the ebs volume whose id is AAA to the master node by:
./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem --ebs-vol-id AAA attach-ebs test

4. Login the master node by:
./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem  login test

5. After you are done, destroy the cluster:
./gl-ec2 -k graphlabkey -i ~/.ssh/graphlab.pem destroy test


================================================
FILE: scripts/ec2_tools/scatter
================================================
#!/bin/bash

src_path=$(hostname):$1
dest_path=$2

echo "Copying $src_path to $dest_path"
mpiexec.openmpi -hostfile ~/machines -nolocal -pernode scp -r $src_path $dest_path 


================================================
FILE: scripts/ec2_tools/setup-hadoop
================================================
#!/bin/bash

if [ ! -e ~/machines ]; then
echo "A list of machines must be provided in ~/machines"
echo "Exiting..."
exit 1
fi

#db: clean ssh known_hosts cache in case ip/name changed
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo rm -fR ~/.ssh/known_hosts

namenode=`head -n 1 ~/machines` # first node is the master
echo "Setting up config"
echo -e \
    "export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/\n" > /bin/hadoop-1.2.1/conf/hadoop-env.sh
cat /bin/hadoop-1.2.1/conf/hadoop-env.sh
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/bin/hadoop-1.2.1/conf/hadoop-env.sh  /bin/hadoop-1.2.1/conf/hadoop-env.sh
     
echo "Setting up namenode information."
echo "  namenode: " $namenode
# mpiexec.openmpi -hostfile ~/machines -pernode \
#     ~/bin/set-namenode.sh $namenode /bin/hadoop-1.2.1/conf/core-site.xml

echo -e \
    '<?xml version="1.0"?>\n'\
    '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n'\
    '<configuration>\n'\
    '  <property>\n'\
    '    <name>fs.default.name</name>\n'\
    '    <value>hdfs://'$namenode'/</value>\n'\
    '  </property>\n'\
    ' <property>\n'\
    '    <name>hadoop.tmp.dir</name>\n'\
    '    <value>/mnt/hadoop/tmp</value>\n'\
    ' </property>\n'\
    '</configuration>\n' > /bin/hadoop-1.2.1/conf/core-site.xml
cat /bin/hadoop-1.2.1/conf/core-site.xml
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/bin/hadoop-1.2.1/conf/core-site.xml  /bin/hadoop-1.2.1/conf/core-site.xml


echo -e \
    '<?xml version="1.0"?>\n' \
    '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n' \
    '<configuration>\n' \
    '  <property>\n' \
    '    <name>mapred.job.tracker</name>\n' \
    '    <value>'$namenode:19001'</value>\n' \
    '  </property>\n' \
    '  <property>\n' \
    '    <name>mapred.local.dir</name>\n' \
    '    <value>/mnt/hadoop/mapred</value>\n' \
    '  </property>\n' \
    '  <property> \n' \
    '    <name>mapred.tasktracker.map.tasks.maximum</name>\n' \
    '    <value>32</value> \n' \
    '  </property> \n' \
    '  <property> \n' \
    '    <name>mapred.tasktracker.reduce.tasks.maximum</name>\n' \
    '    <value>32</value> \n' \
    '  </property> \n' \
    '  <property> \n' \
    '    <name>mapred.reduce.tasks</name>\n' \
    '    <value>4</value> \n' \
    '  </property> \n' \
    '  <property> \n' \
    '    <name>mapred.child.java.bins</name>\n' \
    '    <value>-Xmx1500m</value> \n' \
    '  </property> \n' \
    '</configuration>\n' > /bin/hadoop-1.2.1/conf/mapred-site.xml
cat /bin/hadoop-1.2.1/conf/mapred-site.xml
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/bin/hadoop-1.2.1/conf/mapred-site.xml  /bin/hadoop-1.2.1/conf/mapred-site.xml


echo -e \
    '<?xml version="1.0"?>\n' \
    '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n' \
    '<configuration>\n' \
    '  <property>\n' \
    '    <name>dfs.name.dir</name>\n' \
    '    <value>/mnt/hadoop/nn</value>\n' \
    '  </property>\n' \
    '  <property>\n' \
    '    <name>dfs.data.dir</name>\n' \
    '    <value>/mnt/hadoop/hdfs</value>\n' \
    '  </property>\n' \
    '  <property> \n' \
    '    <name>dfs.replication</name>\n' \
    '    <value>3</value> \n' \
    '  </property> \n' \
    '</configuration>\n' > /bin/hadoop-1.2.1/conf/hdfs-site.xml
cat /bin/hadoop-1.2.1/conf/hdfs-site.xml
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/bin/hadoop-1.2.1/conf/hdfs-site.xml  /bin/hadoop-1.2.1/conf/hdfs-site.xml


echo $namenode > /bin/hadoop-1.2.1/conf/masters
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/bin/hadoop-1.2.1/conf/masters  /bin/hadoop-1.2.1/conf/masters


echo "Creating data directories in /mnt"
echo "   /mnt/tmp    "
echo "   /mnt/hadoop "


# mpiexec.openmpi -hostfile ~/machines -pernode \
#     sudo rm -rf /mnt/hadoop/hdfs
# mpiexec.openmpi -hostfile ~/machines -pernode \
#     sudo rm -rf /mnt/hadoop/nn

#DB: clean old hadoop files in case they are there
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo rm -fR /mnt/hadoop/  
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo mkdir /mnt/hadoop/
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo mkdir /mnt/hadoop/hdfs
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo mkdir /mnt/hadoop/mapred
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo mkdir /mnt/hadoop/tmp
# mpiexec.openmpi -hostfile ~/machines -pernode \
#     sudo mkdir /mnt/hadoop/nn
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo chown -R ubuntu:users /mnt/hadoop/


mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo rm -fR /mnt/tmp/
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo mkdir /mnt/tmp
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo chown -R ubuntu:users /mnt/tmp

echo "Machines file: "
cat ~/machines
while read line
do
  echo "Setting host name to $line"
  mpiexec.openmpi -host $line -pernode sudo hostname $line
done < ~/machines

export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/jre/

/bin/hadoop-1.2.1/bin/hadoop namenode -format


echo "Starting the dfs:"
/bin/hadoop-1.2.1/bin/start-dfs.sh
echo "Starting map reduce:"
/bin/hadoop-1.2.1/bin/start-mapred.sh


================================================
FILE: scripts/ec2_tools/setup-torque
================================================
#!/bin/bash

if [ ! -e ~/machines ]; then
echo "A list of machines must be provided in ~/machines"
echo "Exiting..."
exit 1
fi

namenode=$(hostname)

echo "Install Torque"
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo apt-get --yes install torque-server torque-scheduler \
    torque-client torque-common torque-mom  

echo "Configuring Server"
sudo bash -c "echo $namenode > /etc/torque/server_name"
sudo cp ~/machines /var/spool/torque/server_priv/nodes
sudo qterm
sudo pbs_server

echo "Configuring MOM on remote machines"
echo "\$pbsserver      $namenode" > /tmp/config_mom
mpiexec.openmpi -hostfile ~/machines -pernode \
    scp $namenode:/tmp/config_mom /tmp/config
mpiexec.openmpi -hostfile ~/machines -pernode \
    sudo cp /tmp/config  /var/spool/torque/mom_priv/.


mpiexec.openmpi -hostfile ~/machines -pernode sudo momctl -s
mpiexec.openmpi -hostfile ~/machines -pernode sudo pbs_mom

echo "Configuring queue manager"
sudo qmgr -c 'create queue batch'
sudo qmgr -c 'set queue batch queue_type = Execution'
sudo qmgr -c 'set queue batch resources_default.nodes = 1'
sudo qmgr -c 'set queue batch enabled = True'
sudo qmgr -c 'set queue batch started = True'
sudo qmgr -c 'set server scheduling = True'
sudo qmgr -c 'set server default_queue = batch'
sudo qmgr -c 'set server log_events = 511'


================================================
FILE: scripts/install_graphlab.sh
================================================
#!/bin/bash

git clone https://code.google.com/p/graphlabapi/ 
cd graphlab
./configure | tee install_configure_log.txt
cd release
make -j2 | tee ../v2_debug_log.txt


================================================
FILE: scripts/license_prepend.sh
================================================
DIR="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo $DIR
if ! grep -q Apache $1
then
echo $1
cat $DIR/../license/LICENSE_prepend.txt $1 > /tmp/out
mv /tmp/out $1
fi


================================================
FILE: scripts/linux_run_script_no_jvm_template.sh
================================================
#!/bin/bash
# This script will run the program in the same location and with the same
# name as this script (without the .sh). Passing it the same set of command
# line options.

# It also parses the following environment variables
# JAVA_HOME
#    Either JAVA_HOME or JVM_SO_PATH must be set.
#    This must point to the Java home directory.
#    For instance: /usr/lib/jvm/java-6-openjdk
#    This was tested with Oracle's implementation of Java (sun-jdk or open-jdk).

# JVM_SO_PATH
#    Either JAVA_HOME or JVM_SO_PATH must be set.
#    The script will expect to find libjvm.so in 
#    $JAVA_HOME/jre/lib/amd64/client/libjvm.so or
#    $JAVA_HOME/jre/lib/amd64/server/libjvm.so
#    If libjvm.so is not in either locations, the script will fail. In which 
#    case, you should set this variable to the directory containing libjvm.so.

# USE_SYSTEM_LIBS
#    Optional. If set to 1, the system's glibc (and other system dependencies) 
#   will be used instead of the provided versions. 


# get the program name. By convention we will make it so that the 
# script's name is the same as the program name. But with a ".sh" at the end
PROG=$0
#strip the ".sh" at the end of the script name
PROG=${PROG%.sh}

PROGDIR=`dirname $0`

JVM_PATH=""
HAS_HADOOP=0

if [ -z $USE_SYSTEM_LIBS ]; then
  USE_SYSTEM_LIBS=0
fi

if [ "$USE_SYSTEM_LIBS" -eq "1" ]; then
  echo "Using system libs."
  #using system libs
  $PROG $*
else
  # now. where do I find the dependency directory?
  # lets for now... assume that the directory organization must be
  # /gldeps
  # /toolkits/blah
  # /toolkits/otherblah 
  DEPDIR="$PROGDIR/../../gldeps"

  LIBPATH=$DEPDIR
  if [ ! -z "$JVM_PATH" ]; then
    LIBPATH=$LIBPATH:$JVM_PATH
  fi

  if [ ! -z "$LD_LIBRARY_PATH" ]; then
    LIBPATH=$LIBPATH:$LD_LIBRARY_PATH
  fi

  $DEPDIR/ld-linux-x86-64.so.2 --library-path $LIBPATH $PROG $*
fi


================================================
FILE: scripts/linux_run_script_template.sh
================================================
#!/bin/bash
# This script will run the program in the same location and with the same
# name as this script (without the .sh). Passing it the same set of command
# line options.

# It also parses the following environment variables
# JAVA_HOME
#    Either JAVA_HOME or JVM_SO_PATH must be set.
#    This must point to the Java home directory.
#    For instance: /usr/lib/jvm/java-6-openjdk
#    This was tested with Oracle's implementation of Java (sun-jdk or open-jdk).

# JVM_SO_PATH
#    Either JAVA_HOME or JVM_SO_PATH must be set.
#    The script will expect to find libjvm.so in 
#    $JAVA_HOME/jre/lib/amd64/client/libjvm.so or
#    $JAVA_HOME/jre/lib/amd64/server/libjvm.so
#    If libjvm.so is not in either locations, the script will fail. In which 
#    case, you should set this variable to the directory containing libjvm.so.

# USE_SYSTEM_LIBS
#    Optional. If set to 1, the system's glibc (and other system dependencies) 
#   will be used instead of the provided versions. 


# get the program name. By convention we will make it so that the 
# script's name is the same as the program name. But with a ".sh" at the end
PROG=$0
#strip the ".sh" at the end of the script name
PROG=${PROG%.sh}

PROGDIR=`dirname $0`

# ok... now we build the command line.
# If JVM_SO_PATH is set it takes priority.

JVM_PATH=""
if [ ! -z "$JVM_SO_PATH" ]; then
  JVM_PATH=$JVM_SO_PATH
  if [ -a "$JVM_SO_PATH/libjvm.so" ]; then
    echo 'libjvm.so not found in $JVM_SO_PATH/libjvm.so'
    echo "We are going to try to run anyway. This may not work correctly."
  fi
elif [ ! -z "$JAVA_HOME" ]; then
  if [ -a "$JAVA_HOME/jre/lib/amd64/client/libjvm.so" ]; then
    JVM_PATH="$JAVA_HOME/jre/lib/amd64/client"
  elif [ -a "$JAVA_HOME/jre/lib/amd64/server/libjvm.so" ]; then
    JVM_PATH="$JAVA_HOME/jre/lib/amd64/server"
  else
    echo 'libjvm.so not found in either $JAVA_HOME/jre/lib/amd64/server'
    echo 'or $JAVA_HOME/jre/lib/amd64/client'
    echo "We are going to try to run anyway. This may not work correctly."
  fi
else
  echo "Neither JVM_PATH or JAVA_HOME is set."
  echo "We are going to try to run anyway. This may not work correctly."
fi

# probe for hadoop command
HAS_HADOOP=1
hadoop classpath > /dev/null 2>&1 || {
  echo "hadoop command not found. HDFS unavailable"
    HAS_HADOOP=0
}

if [ -z $USE_SYSTEM_LIBS ]; then
  USE_SYSTEM_LIBS=0
fi

if [ "$USE_SYSTEM_LIBS" -eq "1" ]; then
  echo "Using system libs."
  #using system libs
  if [ $HAS_HADOOP -eq 1 ]; then
    env LD_LIBRARY_PATH=$JVM_PATH:$LD_LIBRARY_PATH CLASSPATH=`hadoop classpath` $PROG $*
  else
    env LD_LIBRARY_PATH=$JVM_PATH:$LD_LIBRARY_PATH $PROG $*
  fi
else
  # now. where do I find the dependency directory?
  # lets for now... assume that the directory organization must be
  # /gldeps
  # /toolkits/blah
  # /toolkits/otherblah 
  DEPDIR="$PROGDIR/../../gldeps"

  LIBPATH=$DEPDIR
  if [ ! -z "$JVM_PATH" ]; then
    LIBPATH=$LIBPATH:$JVM_PATH
  fi

  if [ ! -z "$LD_LIBRARY_PATH" ]; then
    LIBPATH=$LIBPATH:$LD_LIBRARY_PATH
  fi

  if [ $HAS_HADOOP -eq 1 ]; then
    env CLASSPATH=`hadoop classpath` $DEPDIR/ld-linux-x86-64.so.2 --library-path $LIBPATH $PROG $*
  else
    $DEPDIR/ld-linux-x86-64.so.2 --library-path $LIBPATH $PROG $*
  fi
fi


================================================
FILE: scripts/make_all_docs.sh
================================================
doxygen
doxygen Doxyfile_internal 


================================================
FILE: scripts/make_dist.sh
================================================
#!/bin/bash

major_version=2.1

echo "THIS MUST BE RUN IN GRAPHLAB HOME"

## JOEY: WHY ARE WE REMOVING THE FOLDER AND THEN USING RSYNC?
rm -fR dist/graphlabapi
mkdir -p dist/graphlabapi
rsync -vv -al --delete --delete-excluded \
    --exclude=/debug --exclude=/release --exclude=/profile --exclude=/apps \
    --exclude=.hg --exclude=/matlab \
    --exclude=/dist --exclude=/deps --exclude=*~ --exclude=*.orig --exclude=/configure.deps \
    --exclude /make_dist --exclude /BINARY_README * dist/graphlabapi/.

mkdir dist/graphlabapi/apps
cp dist/graphlabapi/demoapps/CMakeLists.txt dist/graphlabapi/apps/
version=`hg summary | grep parent | sed 's/parent: //g' | sed 's/:.*//g'`
version="v${major_version}.$version"
echo "Version: $version"


cd dist
tar -vz \
    -cf graphlabapi_${version}.tar.gz \
    graphlabapi
cd ..

ls -al dist | tail -n 1


================================================
FILE: scripts/mpi_redirect_stdout.sh
================================================
#!/bin/bash

if [ ! -z "$PMI_RANK" ]; then
RANK=$PMI_RANK
elif [ ! -z "$OMPI_COMM_WORLD_RANK" ]; then
RANK=$OMPI_COMM_WORLD_RANK
else
echo "Unable to figure out MPI Rank!"
exit 1
fi
#echo $RANK
$* 2>&1 | tee out.$RANK 


================================================
FILE: scripts/mpirsync
================================================
#!/bin/bash

src_path=$(hostname):$PWD
dest_path=$PWD
mpiexec.openmpi -hostfile ~/machines -nolocal -pernode mkdir -p $dest_path
mpiexec.openmpi -hostfile ~/machines -nolocal -pernode rsync -e 'ssh -o StrictHostKeyChecking=no -i /home/ubuntu/.ssh/id_rsa' -avz --exclude '*.make' --exclude '*.cmake' --exclude '*.internal' --exclude '*.includecache' --exclude '*.o' $src_path/ $dest_path


================================================
FILE: scripts/rpcexec.py
================================================
#!/usr/bin/python
import sys
import os
import string
import subprocess
import time

"""
Usage: rpcexec -n n_to_start -f [hostsfile] [program] [options]
To start local only: rpcexec [program] [options]
"""

def escape(s):
  s = string.replace(s, '"', '\\"')
  s = string.replace(s, "'", "\\'")
  return s
#enddef

# gui: if xterm should run
# machines: a vector of all the machines
# port: a vector of the port number for ssh to connect to. must be same length as machines
# machineid: The machineid to generate
# prog: program to run
# opts: options for the program
def get_ssh_cmd(gui, machines, port, machineid, prog, opts):

  allmachines = '"' + string.join(machines, ',') + '"'

  # construct the command line
  cwd = os.getcwd()
  if (gui):
    sshcmd = 'ssh -X -Y -n -q '
  else:
    sshcmd = 'ssh -n -q '
  #endif

  guicmd = ''
  if (gui):
    guicmd = 'xterm -geometry 120x60 -e '
  #endif

  if (machines[i] == "localhost" or machines[i].startswith("127.")):
    cmd = 'env SPAWNNODES=%s SPAWNID=%d %s %s' % (allmachines,i, prog, opts)
  elif (port[i] == 22):
    cmd = sshcmd + '%s "cd %s ; env SPAWNNODES=%s SPAWNID=%d %s %s %s"' %                       \
                    (machines[machineid], escape(cwd), escape(allmachines),machineid,           \
                    guicmd, escape(prog), escape(opts))
  else:
    cmd = sshcmd + '-oPort=%d %s "cd %s ; env SPAWNNODES=%s SPAWNID=%d %s %s %s"' %              \
                    (port[machineid], machines[machineid], escape(cwd), escape(allmachines),     \
                    machineid, guicmd, escape(prog), escape(opts))
  #endif
  return cmd
#enddef


def get_screen_cmd(gui, machines, port, machineid, prog, opts):

  allmachines = '"' + string.join(machines, ',') + '"'

  # construct the command line
  cwd = os.getcwd()
  sshcmd = 'ssh -t '
  #endif

  guicmd = ''

  if (machines[i] == "localhost" or machines[i].startswith("127.")):
    cmd = ['export SPAWNNODES=%s SPAWNID=%d ; %s %s' % (allmachines,i, prog, opts)]
  elif (port[i] == 22):
    cmd = [sshcmd + '%s "cd %s ; export SPAWNNODES=%s SPAWNID=%d; %s %s %s ; bash -il"' %                       \
                    (machines[machineid], escape(cwd), escape(allmachines),machineid,           \
                    guicmd, escape(prog), escape(opts))]
  else:
    cmd = [sshcmd + '-oPort=%d %s "cd %s ; export SPAWNNODES=%s SPAWNID=%d; %s %s %s ; bash -il"' %              \
                    (port[machineid], machines[machineid], escape(cwd), escape(allmachines),     \
                    machineid, guicmd, escape(prog), escape(opts))]
  #endif
  return cmd
#enddef


def shell_popen(cmd):
  print cmd
  return subprocess.Popen(cmd, shell=True)
#endif

def shell_wait_native(cmd):
  print cmd
  pid = subprocess.Popen(cmd, shell=True)
  os.waitpid(pid.pid, 0)
  #time.sleep(0.5)
#endif


nmachines = 0
hostsfile = ''
prog = ''
opts = ''
gui = 0
inscreen = 0
screenname = ''
printhelp = 0
i = 1
while(i < len(sys.argv)):
  if sys.argv[i] == '-h' or sys.argv[i] == '--help':
    printhelp = 1
    break
  elif sys.argv[i] == '-n':
    nmachines = int(sys.argv[i+1])
    i = i + 2
  elif sys.argv[i] == '-f':
    hostsfile = sys.argv[i+1]
    i = i + 2
  elif sys.argv[i] == '-g':
    gui = 1
    i = i + 1
  elif sys.argv[i] == '-s':
    inscreen = 1
    screenname = sys.argv[i+1]
    i = i + 2
  else:
    prog = sys.argv[i]
    if (len(sys.argv) > i+1):
      opts = string.join(sys.argv[(i+1):])
    #endif
    break
  #endif
#endwhile
if inscreen and gui:
  print ("-s and -g are mutually exclusive")
  exit(0)
#endif

if (printhelp):
  print
  print("Usage: rpcexec -n [n_to_start] -f [hostsfile] [program] [options]")
  print("To start local only: rpcexec [program] [options]")
  print("Optional Arguments:")
  print("-g: Launch the command within Xterm on all machines. ")
  print("-s [screenname] : Launch a screen session and launch the")
  print("        commands in each window in each window. Any ssh connections")
  print("        are preserved on termination of the program with environment")
  print("        properly set up for subsequent executions")
  print("")
  print("Note: -s [screenname] and -g are mutually exclusive")
  
  exit(0)
#endif

if (nmachines == 0 and hostsfile == ''):
  cmd = 'env SPAWNNODES=localhost SPAWNID=0 %s %s' % (prog, opts)
  p = shell_popen(cmd)
  os.waitpid(p.pid, 0)
  exit(0)
#endif
print('Starting ' + str(nmachines) + ' machines')
print('Hosts file: ' + hostsfile)
print('Command Line to run: ' + prog + ' ' + opts)


# open the hosts file and read the machines
try:
  f = open(hostsfile, 'r')
except:
  print
  print("Unable to open hosts file")
  print
  exit(0)
#endtry

machines = [''] * nmachines
port = [22] * nmachines
for i in range(nmachines):
  try:
    machines[i] = string.strip(f.readline())
    colonsplit = string.split(machines[i], ':')
    if (len(colonsplit) == 2):
      machines[i] = string.strip(colonsplit[0])
      port[i] = int(colonsplit[1])
    #endif
  except:
    print
    print("Unable to read line " + str(i+1) + " of hosts file")
    print
    exit(0)
#endfor
f.close()

# the commands to run to start for each node
cmd = [None] * nmachines
for i in range(nmachines):
  if (inscreen == 0):
    cmd[i] = get_ssh_cmd(gui, machines, port, i, prog, opts)
  else:
    cmd[i] = get_screen_cmd(gui, machines, port, i, prog, opts)
    print cmd[i]
  #endif
#endfor

if (inscreen == 0):
  # now issue the ssh commands
  procs = [None] * nmachines
  for i in range(nmachines):
    procs[i] = shell_popen(cmd[i])
  #endfor
  
  for i in range(nmachines):
    os.waitpid(procs[i].pid, 0)
  #endfor
else:
  # create a new empty screen with the screen name
  shell_wait_native("screen -h 10000 -d -m -S " + screenname)
  shell_wait_native("screen -h 10000 -x %s -p 0 -X title %s" % (screenname, machines[0][0:8]))

  # start a bunch of empty screens
  for i in range(nmachines - 1):
    shell_wait_native("screen -x %s -X screen -t %s" % (screenname, machines[i+1][0:8]))
  #endfor
  # set the titles in each one and run the program
  # we stripe it across windows so if there are ssh commands they will 
  # have time to finish running first
  for j in range(2):
    for i in range(nmachines):
      if (len(cmd[i]) > j and cmd[i][j] != None):
        shell_wait_native("screen -x %s -p %d -X stuff %s" % (screenname, i, "'"+cmd[i][j]+"\n'"))
      #endif
    #endfor
  #endfor
#endif


================================================
FILE: scripts/test_dist.sh
================================================
#!/bin/sh

#script for auto test graphlab distribution, written by danny bickson
if [ $# -ne 1 ]; then
   echo "Usage: $0 <release number>"
fi
rm -fR /tmp/graphlabapi*
cp dist/graphlabapi_v1_$1.tar.gz /tmp/
cd /tmp/

tar xvzf graphlabapi_v1_$1.tar.gz
cd /tmp/graphlabapi
./configure --bootstrap --yes
cd release
make -j8

cd tests
./runtests.sh


================================================
FILE: src/CMakeLists.txt
================================================
project(GraphLab)

subdirs(graphlab)


================================================
FILE: src/graphlab/CMakeLists.txt
================================================
project(GraphLab)

# link_libraries(${Boost_LIBRARIES})


subdirs(
#  aggregation
  engine
  graph
  options
  parallel
  scheduler
  util
  serialization
  logger
  ui
#  jni
  )


#build the graphlab library
add_library(graphlab STATIC
  options/command_line_options.cpp
  options/options_map.cpp
  util/timer.cpp
  util/generics/any.cpp
  util/hdfs.cpp
  logger/logger.cpp
  logger/backtrace.cpp
  parallel/pthread_tools.cpp
  # parallel/qthread_tools.cpp
  parallel/thread_pool.cpp
  parallel/fiber_control.cpp
  parallel/fiber_group.cpp
  util/random.cpp
  scheduler/scheduler_list.cpp
  scheduler/fifo_scheduler.cpp
  scheduler/priority_scheduler.cpp
  scheduler/sweep_scheduler.cpp
  scheduler/queued_fifo_scheduler.cpp
  util/net_util.cpp
  util/safe_circular_char_buffer.cpp
  util/fs_util.cpp
  util/memory_info.cpp
  util/tracepoint.cpp
  util/mpi_tools.cpp
  util/web_util.cpp
  util/inplace_lf_queue.cpp
  zookeeper/zookeeper_common.cpp
  zookeeper/key_value.cpp
  zookeeper/server_list.cpp
  rpc/dc_tcp_comm.cpp
  rpc/circular_char_buffer.cpp
  rpc/dc_stream_receive.cpp
  rpc/dc_buffered_stream_send2.cpp
  rpc/dc.cpp
  rpc/request_reply_handler.cpp
  rpc/dc_init_from_env.cpp
  rpc/dc_init_from_mpi.cpp
  rpc/dc_init_from_zookeeper.cpp
  rpc/async_consensus.cpp
  rpc/fiber_async_consensus.cpp
  rpc/distributed_event_log.cpp
  rpc/delta_dht.cpp
  rpc/thread_local_send_buffer.cpp
  ui/mongoose/mongoose.cpp
  ui/metrics_server.cpp
  rpc/get_current_process_hash.cpp
  )
requires_core_deps(graphlab)


# if(Sctp-FOUND)
#   set_property(TARGET graphlab APPEND PROPERTY COMPILE_FLAGS -DHAS_SCTP)
#   target_link_libraries(graphlab sctp)
# endif()


INSTALL(TARGETS 
  graphlab ARCHIVE DESTINATION lib)


================================================
FILE: src/graphlab/aggregation/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/aggregation/aggregation_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 
#include <graphlab/aggregation/iaggregator.hpp>


================================================
FILE: src/graphlab/aggregation/distributed_aggregator.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DISTRIBUTED_AGGREGATOR
#define GRAPHLAB_DISTRIBUTED_AGGREGATOR

#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <map>
#include <set>
#include <string>
#include <vector>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/vertex_program/icontext.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/util/generics/conditional_addition_wrapper.hpp>
#include <graphlab/util/generics/test_function_or_functor_type.hpp>

#include <graphlab/util/generics/any.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/mutable_queue.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \internal
   * Implements a distributed aggregator interface which can be plugged
   * into the engine. This class includes management of periodic aggregators.
   * 
   * Essentially, the engine should ideally pass-through all calls to
   *  - add_vertex_aggregator()
   *  - add_edge_aggregator()
   *  - aggregate_now()
   *  - aggregate_periodic()
   * 
   * On engine start(), the engine should call aggregate_all_periodic() 
   * to ensure all periodic aggregators are called once prior to vertex program
   * execution. After which, the start() function should be called to prepare
   * the state of the schedule. At termination of the engine, the stop()
   * function should be called to reset the state of the aggregator.
   * 
   * During engine execution, two modes of operations are permitted: 
   * synchronous, and asynchronous. In a synchronous mode of execution,
   * the tick_synchronous() function should be called periodically by 
   * exactly one thread on each machine, at the same time. In an asynchronous
   * mode of execution, tick_asynchronous() should be called periodically
   * on each machine by some arbitrary thread. This polls the state of the 
   * schedule and activates aggregation jobs which are ready. 
   * 
   * tick_synchronous() and tick_asynchronous() should not be used 
   * simultaneously within the same engine execution . For details on their 
   * usage, see their respective documentation.
   * 
   */
  template<typename Graph, typename IContext>
  class distributed_aggregator {
  public:
    typedef Graph graph_type;
    typedef typename graph_type::local_edge_list_type local_edge_list_type;
    typedef typename graph_type::local_edge_type local_edge_type;
    typedef typename graph_type::edge_type edge_type;
    typedef typename graph_type::local_vertex_type local_vertex_type;
    typedef typename graph_type::vertex_type vertex_type ;
    typedef IContext icontext_type;

    dc_dist_object<distributed_aggregator> rmi;
    graph_type& graph;
    icontext_type* context;
    
  private:
    
    /**
     * \internal
     * A base class which contains a "type-free" specification of the
     * reduction operation, thus allowing the aggregation to be performs at
     * runtime with no other type information whatsoever.
     */
    struct imap_reduce_base {
      /** \brief makes a copy of the current map reduce spec without copying 
       *         accumulator data     */
      virtual imap_reduce_base* clone_empty() const = 0;
      
      /** \brief Performs a map operation on the given vertex adding to the
       *         internal accumulator */
      virtual void perform_map_vertex(icontext_type&, vertex_type&) = 0;
                                      
      /** \brief Performs a map operation on the given edge adding to the
       *         internal accumulator */
      virtual void perform_map_edge(icontext_type&, edge_type&) = 0;
                                    
      /** \brief Returns true if the accumulation is over vertices. 
                 Returns false if it is over edges.*/
      virtual bool is_vertex_map() const = 0;      
      
      /** \brief Returns the accumulator stored in an any. 
                 (by some magic, any's can be serialized) */
      virtual any get_accumulator() const = 0;
      
      /** \brief Combines accumulators using a second accumulator 
                 stored in an any (as returned by get_accumulator).
                 Must be thread safe.*/
      virtual void add_accumulator_any(any& other) = 0;

      /** \brief Sets the value of the accumulator
                 from an any (as returned by get_accumulator).
                 Must be thread safe.*/
      virtual void set_accumulator_any(any& other) = 0;

      
      /** \brief Combines accumulators using a second accumulator 
                 stored in a second imap_reduce_base class). Must be
                 thread safe. */
      virtual void add_accumulator(imap_reduce_base* other) = 0;
      
      /** \brief Resets the accumulator */
      virtual void clear_accumulator() = 0;
      
      /** \brief Calls the finalize operation on internal accumulator */
      virtual void finalize(icontext_type&) = 0;

      virtual ~imap_reduce_base() { }
    };
    
    template <typename ReductionType>
    struct default_map_types{
      typedef ReductionType (*vertex_map_type)(icontext_type&, const vertex_type&);
      typedef ReductionType (*edge_map_type)(icontext_type&, const edge_type&);
    };

    /**
     * \internal
     * A templated implementation of the imap_reduce_base above.
     * \tparam ReductionType The reduction type. (The type the map function
     *                        returns)
     */
    template <typename ReductionType, 
              typename VertexMapperType,
              typename EdgeMapperType,
              typename FinalizerType>
    struct map_reduce_type : public imap_reduce_base {
      conditional_addition_wrapper<ReductionType> acc;
      VertexMapperType map_vtx_function;
      EdgeMapperType map_edge_function;
      FinalizerType finalize_function;
      
      bool vertex_map;
      mutex lock;
      
      /**
       * \brief Constructor which constructs a vertex reduction
       */
      map_reduce_type(VertexMapperType map_vtx_function,
                      FinalizerType finalize_function)
                : map_vtx_function(map_vtx_function),
                  finalize_function(finalize_function), vertex_map(true) { }

      /**
       * \brief Constructor which constructs an edge reduction. The last bool
       * is unused and allows for disambiguation between the two constructors
       */
      map_reduce_type(EdgeMapperType map_edge_function,
                      FinalizerType finalize_function,
                      bool)
                : map_edge_function(map_edge_function),
                finalize_function(finalize_function), vertex_map(false) { }


      void perform_map_vertex(icontext_type& context, vertex_type& vertex) {
        /** 
         * A compiler error on this line is typically due to the
         * aggregator map function not having the correct type. 
         *
         * Verify that the map function has the following form:
         *
         *  ReductionType mapfun(icontext_type& context, const vertex_type& vertex);
         *
         * It is also possible the accumulator type 
         */
        ReductionType temp = map_vtx_function(context, vertex);
        /**
         * A compiler error on this line is typically due to the
         * accumulator (ReductionType of the map function not having an
         * operator+=.  Ensure that the following is available:
         *
         *   ReductionType& operator+=(ReductionType& lvalue, 
         *                             const ReductionType& rvalue);
         */
        acc += temp;
      } // end of perform_map_vertex
      
      void perform_map_edge(icontext_type& context, edge_type& edge) {
        /** 
         * A compiler error on this line is typically due to the
         * aggregator map function not having the correct type. 
         *
         * Verify that the map function has the following form:
         *
         *  ReductionType mapfun(icontext_type& context, const edge_type& vertex);
         *
         * It is also possible the accumulator type 
         */
        ReductionType temp = map_edge_function(context, edge);
        /**
         * A compiler error on this line is typically due to the
         * accumulator (ReductionType of the map function not having an
         * operator+=.  Ensure that the following is available:
         *
         *   ReductionType& operator+=(ReductionType& lvalue, 
         *                             const ReductionType& rvalue);
         */
        acc += temp; 
      } // end of perform_map_edge
      
      bool is_vertex_map() const {
        return vertex_map;
      }
      
      any get_accumulator() const {
        return any(acc);
      }
      
      void add_accumulator_any(any& other) {
        lock.lock();
        acc += other.as<conditional_addition_wrapper<ReductionType> >();
        lock.unlock();
      }

      void set_accumulator_any(any& other) {
        lock.lock();
        acc = other.as<conditional_addition_wrapper<ReductionType> >();
        lock.unlock();
      }


      void add_accumulator(imap_reduce_base* other) {
        lock.lock();
        acc += dynamic_cast<map_reduce_type*>(other)->acc;
        lock.unlock();
      }

      void clear_accumulator() {
        acc.clear();
      }

      void finalize(icontext_type& context) {
        finalize_function(context, acc.value);
      }
      
      imap_reduce_base* clone_empty() const {
        map_reduce_type* copy;
        if (is_vertex_map()) {
          copy = new map_reduce_type(map_vtx_function,
                                     finalize_function);
        }
        else {
          copy = new map_reduce_type(map_edge_function,
                                     finalize_function,
                                     true);
        }
        return copy;
      }
    };
    

    std::map<std::string, imap_reduce_base*> aggregators;
    std::map<std::string, float> aggregate_period;

    struct async_aggregator_state {
      /// Performs reduction of all local threads. On machine 0, also
      /// accumulates for all machines.
      imap_reduce_base* root_reducer;
      /// Accumulator used for each thread
      std::vector<imap_reduce_base*> per_thread_aggregation;
      /// Count down the completion of the local machine threads
      atomic<int> local_count_down;
      /// Count down the completion of machines. Used only on machine 0
      atomic<int> distributed_count_down;
    };
    std::map<std::string, async_aggregator_state> async_state;

    float start_time;
    
    /* annoyingly the mutable queue is a max heap when I need a min-heap
     * to track the next thing to activate. So we need to keep 
     *  negative priorities... */
    mutable_queue<std::string, float> schedule;
    mutex schedule_lock;
    size_t ncpus;

    template <typename ReductionType, typename F>
    static void test_vertex_mapper_type(std::string key = "") {
      bool test_result = test_function_or_const_functor_2<F,
                                             ReductionType(icontext_type&,
                                                          const vertex_type&),
                                             ReductionType,
                                             icontext_type&,
                                             const vertex_type&>::value;
      if (!test_result) {
        std::stringstream strm;
        strm << "\n";
        if (key.empty()) {
          strm << "Vertex Map Function does not pass strict runtime type checks. \n";
        }
        else {
          strm << "Map Function in Vertex Aggregator " << key
              << " does not pass strict runtime type checks. \n";
        }
        if (boost::is_function<typename boost::remove_pointer<F>::type>::value) {
          strm
              << "Function prototype should be \n" 
              << "\t ReductionType f(icontext_type&, const vertex_type&)\n";
        }
        else {
            strm << "Functor's operator() prototype should be \n"
              << "\t ReductionType operator()(icontext_type&, const vertex_type&) const\n";
        }
        strm << "If you are not intentionally violating the abstraction,"
              << " we recommend fixing your function for safety reasons";
        strm.flush();
        logstream(LOG_WARNING) << strm.str() << std::endl;
      }
    }

    template <typename ReductionType, typename F>
    static void test_edge_mapper_type(std::string key = "") {
      bool test_result = test_function_or_const_functor_2<F,
                                             ReductionType(icontext_type&,
                                                          const edge_type&),
                                             ReductionType,
                                             icontext_type&,
                                             const edge_type&>::value;

      if (!test_result) {
        std::stringstream strm;
        strm << "\n";
        if (key.empty()) {
          strm << "Edge Map Function does not pass strict runtime type checks. \n";
        }
        else {
          strm << "Map Function in Edge Aggregator " << key
              << " does not pass strict runtime type checks. \n";
        }
        if (boost::is_function<typename boost::remove_pointer<F>::type>::value) {
          strm << "Function prototype should be \n"
              << "\t ReductionType f(icontext_type&, const edge_type&)\n";
        }
        else {
            strm << "Functor's operator() prototype should be "
              << "\t ReductionType operator()(icontext_type&, const edge_type&) const\n";
        }
        strm << "If you are not intentionally violating the abstraction,"
            << " we recommend fixing your function for safety reasons";
        logstream(LOG_WARNING) << strm.str() << std::endl;
      }
    }
    
  public:

    
    distributed_aggregator(distributed_control& dc, 
                           graph_type& graph, 
                           icontext_type* context):
                            rmi(dc, this), graph(graph), 
                            context(context), ncpus(0) { }

    /**
     * \copydoc graphlab::iengine::add_vertex_aggregator
     */
    template <typename ReductionType, 
              typename VertexMapperType, 
              typename FinalizerType>
    bool add_vertex_aggregator(const std::string& key,
                               VertexMapperType map_function,
                               FinalizerType finalize_function) {
      if (key.length() == 0) return false;
      if (aggregators.count(key) == 0) {

        if (rmi.procid() == 0) {
          // do a runtime type check
          test_vertex_mapper_type<ReductionType, VertexMapperType>(key);
        }
        
        aggregators[key] = new map_reduce_type<ReductionType,
                                               VertexMapperType,
                                               typename default_map_types<ReductionType>::edge_map_type,
                                               FinalizerType>(map_function, 
                                                             finalize_function);
        return true;
      }
      else {
        // aggregator already exists. fail 
        return false;
      }
    }
    
#if defined(__cplusplus) && __cplusplus >= 201103L
    /**
     * \brief An overload of add_vertex_aggregator for C++11 which does not
     *        require the user to provide the reduction type.
     *
     * This function is available only if the compiler has C++11 support.
     * Specifically, it uses C++11's decltype operation to infer the
     * reduction type, thus eliminating the need for the function
     */
    template <typename VertexMapperType, 
              typename FinalizerType>
    bool add_vertex_aggregator(const std::string& key,
                               VertexMapperType map_function,
                               FinalizerType finalize_function) {
      //typedef decltype(map_function(*context,graph.vertex(0))) ReductionType;
      typedef decltype(map_function(*context, graph.vertex(0))) ReductionType;
      if (key.length() == 0) return false;
      if (aggregators.count(key) == 0) {
        aggregators[key] = new map_reduce_type<ReductionType,
                                               VertexMapperType,
                                               typename default_map_types<ReductionType>::edge_map_type,
                                               FinalizerType>(map_function, 
                                                             finalize_function);
        return true;
      }
      else {
        // aggregator already exists. fail 
        return false;
      }
    }
#endif

    /**
     * \copydoc graphlab::iengine::add_edge_aggregator
     */
    template <typename ReductionType,
              typename EdgeMapperType,
              typename FinalizerType>
    bool add_edge_aggregator(const std::string& key,
                             EdgeMapperType map_function,
                             FinalizerType finalize_function) {
      if (key.length() == 0) return false;
      if (aggregators.count(key) == 0) {
        if (rmi.procid() == 0) {
          // do a runtime type check
          test_edge_mapper_type<ReductionType, EdgeMapperType>(key);
        }
        aggregators[key] = new map_reduce_type<ReductionType, 
                                            typename default_map_types<ReductionType>::vertex_map_type,
                                            EdgeMapperType, 
                                            FinalizerType>(map_function, 
                                                           finalize_function, 
                                                           true);
        return true;
      }
      else {
        // aggregator already exists. fail 
        return false;
      }
    }
    
#if defined(__cplusplus) && __cplusplus >= 201103L
    /**
     * \brief An overload of add_edge_aggregator for C++11 which does not
     *        require the user to provide the reduction type.
     *
     * This function is available only if the compiler has C++11 support.
     * Specifically, it uses C++11's decltype operation to infer the
     * reduction type, thus eliminating the need for the function
     * call to be templatized over the reduction type. 
     */
    template <typename EdgeMapperType,
              typename FinalizerType>
    bool add_edge_aggregator(const std::string& key,
                             EdgeMapperType map_function,
                             FinalizerType finalize_function) {
      // an edge_type is actually hard to get
      typedef decltype(map_function(*context, edge_type(graph.l_vertex(0).in_edges()[0]) )) ReductionType;
      if (key.length() == 0) return false;
      if (aggregators.count(key) == 0) {
        aggregators[key] = new map_reduce_type<ReductionType, 
                                            typename default_map_types<ReductionType>::vertex_map_type,
                                            EdgeMapperType, 
                                            FinalizerType>(map_function, 
                                                           finalize_function, 
                                                           true);
        return true;
      }
      else {
        // aggregator already exists. fail 
        return false;
      }
    }
#endif
    
    /**
     * \copydoc graphlab::iengine::aggregate_now
     */
    bool aggregate_now(const std::string& key) {
      ASSERT_MSG(graph.is_finalized(), "Graph must be finalized");
      if (aggregators.count(key) == 0) {
        ASSERT_MSG(false, "Requested aggregator %s not found", key.c_str());
        return false;
      }
      
      imap_reduce_base* mr = aggregators[key];
      mr->clear_accumulator();
      // ok. now we perform reduction on local data in parallel
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        imap_reduce_base* localmr = mr->clone_empty();
        if (localmr->is_vertex_map()) {
#ifdef _OPENMP
        #pragma omp for
#endif
          for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
            local_vertex_type lvertex = graph.l_vertex(i);
            if (lvertex.owner() == rmi.procid()) {
              vertex_type vertex(lvertex);
              localmr->perform_map_vertex(*context, vertex);
            }
          }
        }
        else {
#ifdef _OPENMP
        #pragma omp for
#endif
          for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
            foreach(local_edge_type e, graph.l_vertex(i).in_edges()) {
              edge_type edge(e);
              localmr->perform_map_edge(*context, edge);
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          mr->add_accumulator(localmr);
        }
        delete localmr;
      }
      
      std::vector<any> gathervec(rmi.numprocs());
      gathervec[rmi.procid()] = mr->get_accumulator();
      
      rmi.gather(gathervec, 0);
      
      if (rmi.procid() == 0) {
        // machine 0 aggregates the accumulators
        // sums them together and broadcasts it
        for (procid_t i = 1; i < rmi.numprocs(); ++i) {
          mr->add_accumulator_any(gathervec[i]);
        }
        any val = mr->get_accumulator();
        rmi.broadcast(val, true);
      }
      else {
        // all other machines wait for the broadcast value
        any val;
        rmi.broadcast(val, false);
        mr->set_accumulator_any(val);
      }
      mr->finalize(*context);
      mr->clear_accumulator();
      gathervec.clear();
      return true;
    }
    
    
    /**
     * \copydoc graphlab::iengine::aggregate_periodic
     */
    bool aggregate_periodic(const std::string& key, float seconds) {
      rmi.barrier();
      if (seconds < 0) return false;
      if (aggregators.count(key) == 0) return false;
      else aggregate_period[key] = seconds;
      return true;
    }
    
    /**
     * Performs aggregation on all keys registered with a period.
     * May be used on engine start() to ensure all periodic 
     * aggregators are executed before engine execution.
     */
    void aggregate_all_periodic() {
      typename std::map<std::string, float>::iterator iter =
        aggregate_period.begin();
      while (iter != aggregate_period.end()) { 
        aggregate_now(iter->first);
        ++iter;
      }
    }
    
    
    /**
     * Must be called on engine start. Initializes the internal scheduler.
     * Must be called on all machines simultaneously.
     * ncpus is really only important for the asynchronous implementation.
     * It must be equal to the number of engine threads.
     *
     * \param [in] cpus Number of engine threads used. This is only necessary
     *                  if the asynchronous form is used.
     */
    void start(size_t ncpus = 0) {
      rmi.barrier();
      schedule.clear();
      start_time = timer::approx_time_seconds();
      typename std::map<std::string, float>::iterator iter =
                                                    aggregate_period.begin();
      while (iter != aggregate_period.end()) {
        // schedule is a max heap. To treat it like a min heap
        // I need to insert negative keys
        schedule.push(iter->first, -iter->second);
        ++iter;
      }
      this->ncpus = ncpus;

      // now initialize the asyncronous reduction states
      if(ncpus > 0) {
        iter = aggregate_period.begin();
        while (iter != aggregate_period.end()) {
          async_state[iter->first].local_count_down = (int)ncpus;
          async_state[iter->first].distributed_count_down =
                                                        (int)rmi.numprocs();
          
          async_state[iter->first].per_thread_aggregation.resize(ncpus);
          for (size_t i = 0; i < ncpus; ++i) {
            async_state[iter->first].per_thread_aggregation[i] =
                                    aggregators[iter->first]->clone_empty();
          }
          async_state[iter->first].root_reducer =
                                      aggregators[iter->first]->clone_empty();
          ++iter;
        }
      }
    }
    
    
    /**
     * If asynchronous aggregation is desired, this function is
     * to be called periodically on each machine. This polls the schedule to
     * check if there is an aggregator which needs to be activated. If there
     * is an aggregator to be started, this function will return a non empty
     * string. This function is thread reentrant and each activated aggregator
     * will only return a non empty string call to one call to
     * tick_asynchronous() on each machine.
     * 
     * If an empty is returned, the asynchronous engine
     * must ensure that all threads (ncpus per machine) must eventually
     * call tick_asynchronous_compute(cpuid, key) where key is the return string.
     */ 
    std::string tick_asynchronous() {
      // if we fail to acquire the lock, go ahead
      if (!schedule_lock.try_lock()) return "";
      
      // see if there is a key to run
      float curtime = timer::approx_time_seconds() - start_time;
      std::string key;
      bool has_entry = false;
      if (!schedule.empty() && -schedule.top().second <= curtime) {
        key = schedule.top().first;
        has_entry = true;
        schedule.pop();
      }
      schedule_lock.unlock();

      // no key to run. return false
      if (has_entry == false) return "";
      else return key;
      // ok. we have a key to run, construct the local reducers
    }

    
    /**
     * Once tick_asynchronous() returns a key, all threads in the engine
     * should call tick_asynchronous_compute() with a matching key.
     * This function will perform the computation for the key in question
     * and send the accumulated result back to machine 0 when done
     */
    void tick_asynchronous_compute(size_t cpuid, const std::string& key) {
      // acquire and check the async_aggregator_state
      typename std::map<std::string, async_aggregator_state>::iterator iter =
                                                        async_state.find(key);
      ASSERT_MSG(iter != async_state.end(), "Key %s not found", key.c_str());
      ASSERT_GT(iter->second.per_thread_aggregation.size(), cpuid);
      
      imap_reduce_base* localmr = iter->second.per_thread_aggregation[cpuid];
      // perform the reduction using the local mr
      if (localmr->is_vertex_map()) {
        for (int i = cpuid;i < (int)graph.num_local_vertices(); i+=ncpus) {
          local_vertex_type lvertex = graph.l_vertex(i);
          if (lvertex.owner() == rmi.procid()) {
            vertex_type vertex(lvertex);
            localmr->perform_map_vertex(*context, vertex);
          }
        }
      } else {
        for (int i = cpuid;i < (int)graph.num_local_vertices(); i+=ncpus) {
          foreach(local_edge_type e, graph.l_vertex(i).in_edges()) {
            edge_type edge(e);
            localmr->perform_map_edge(*context, edge);
          }
        }
      }
      iter->second.root_reducer->add_accumulator(localmr);
      int countdown_val = iter->second.local_count_down.dec();

      ASSERT_LT(countdown_val, ncpus);
      ASSERT_GE(countdown_val, 0);
      if (countdown_val == 0) {
        // reset the async_state to pristine condition.
        // - clear all thread reducers since we got all we need from them
        // - clear all the local root reducer except for machine 0 (and after
        //   we read the accumulator from them.
        // - reset the counters
        for (size_t i = 0;
             i < iter->second.per_thread_aggregation.size(); ++i) {
          iter->second.per_thread_aggregation[i]->clear_accumulator();
        }
        iter->second.local_count_down = ncpus;
        
        if (rmi.procid() != 0) {
          // ok we need to signal back to the the root to perform finalization
          // read the accumulator
          any acc = iter->second.root_reducer->get_accumulator();
          iter->second.root_reducer->clear_accumulator();
          rmi.remote_call(0, &distributed_aggregator::rpc_key_merge,
                          key, acc);
        }
        else {
          decrement_distributed_counter(key);
        }
      }
    }

    /**
     * RPC Call called by other machines with their accumulator for the key.
     * This function will merge the accumulator and perform finalization
     * when all accumulators are received
     */
    void rpc_key_merge(const std::string& key, any& acc) {
      // acquire and check the async_aggregator_state 
      typename std::map<std::string, async_aggregator_state>::iterator iter =
                                                      async_state.find(key);
      ASSERT_MSG(iter != async_state.end(), "Key %s not found", key.c_str());
      iter->second.root_reducer->add_accumulator_any(acc);
      decrement_distributed_counter(key);
    }

    /**
     * Called whenever one machine finishes all of its local accumulation.
     * When the counter determines that all machine's accumulators have been
     * received, this function performs finalization and prepares and
     * broadcasts the next scheduled time for the key.
     */
    void decrement_distributed_counter(const std::string& key) {
      // must be master machine
      ASSERT_EQ(rmi.procid(), 0);
      // acquire and check the async_aggregator_state 
      typename std::map<std::string, async_aggregator_state>::iterator iter =
                                                      async_state.find(key);
      ASSERT_MSG(iter != async_state.end(), "Key %s not found", key.c_str());
      int countdown_val = iter->second.distributed_count_down.dec();
      logstream(LOG_INFO) << "Distributed Aggregation of " << key << ". "
                          << countdown_val << " remaining." << std::endl;

      ASSERT_LE(countdown_val, rmi.numprocs());
      ASSERT_GE(countdown_val, 0);
      if (countdown_val == 0) {
        logstream(LOG_INFO) << "Aggregate completion of " << key << std::endl;
        any acc_val = iter->second.root_reducer->get_accumulator();
        // set distributed count down again for the second phase:
        // waiting for everyone to finish finalization
        iter->second.distributed_count_down = rmi.numprocs();
        for (procid_t i = 1;i < rmi.numprocs(); ++i) {
          rmi.remote_call(i, &distributed_aggregator::rpc_perform_finalize,
                            key, acc_val);
        }
        iter->second.root_reducer->finalize(*context);
        iter->second.root_reducer->clear_accumulator();
        decrement_finalize_counter(key);
      }
    }

    /**
     * Called from the root machine to all machines to perform finalization
     * on the key
     */
    void rpc_perform_finalize(const std::string& key, any& acc_val) {
      ASSERT_NE(rmi.procid(), 0);
      typename std::map<std::string, async_aggregator_state>::iterator iter =
                                                  async_state.find(key);
      ASSERT_MSG(iter != async_state.end(), "Key %s not found", key.c_str());
      
      iter->second.root_reducer->set_accumulator_any(acc_val);
      iter->second.root_reducer->finalize(*context);
      iter->second.root_reducer->clear_accumulator();
      // reply to the root machine
      rmi.remote_call(0, &distributed_aggregator::decrement_finalize_counter,
                      key);
    }


    void decrement_finalize_counter(const std::string& key) {
      typename std::map<std::string, async_aggregator_state>::iterator iter =
                                                      async_state.find(key);
      ASSERT_MSG(iter != async_state.end(), "Key %s not found", key.c_str());
      int countdown_val = iter->second.distributed_count_down.dec();
      if (countdown_val == 0) {
        // done! all finalization is complete.
        // reset the counter
        iter->second.distributed_count_down = rmi.numprocs();
        // when is the next time we start. 
        // time is as an offset to start_time
        float next_time = timer::approx_time_seconds() + 
                          aggregate_period[key] - start_time;
        logstream(LOG_INFO) << rmi.procid() << "Reschedule of " << key
                          << " at " << next_time << std::endl;
        rpc_schedule_key(key, next_time);
        for (procid_t i = 1;i < rmi.numprocs(); ++i) {
          rmi.remote_call(i, &distributed_aggregator::rpc_schedule_key,
                            key, next_time);
        }
      }
    }

    /**
     * Called to schedule the next trigger time for the key
     */
    void rpc_schedule_key(const std::string& key, float next_time) {
      schedule_lock.lock();
      schedule.push(key, -next_time);
      schedule_lock.unlock();
    }

    
    /**
     * If synchronous aggregation is desired, this function is
     * To be called simultaneously by one thread on each machine. 
     * This polls the schedule to see if there
     * is an aggregator which needs to be activated. If there is an aggregator 
     * to be started, this function will perform aggregation.
     */ 
    void tick_synchronous() {
      // if timer has exceeded our top key
      float curtime = timer::approx_time_seconds() - start_time;
      rmi.broadcast(curtime, rmi.procid() == 0);
      // note that we do not call approx_time_seconds everytime
      // this ensures that each key will only be run at most once.
      // each time tick_synchronous is called.
      std::vector<std::pair<std::string, float> > next_schedule;
      while(!schedule.empty() && -schedule.top().second <= curtime) {
        std::string key = schedule.top().first;
        aggregate_now(key);
        schedule.pop();
        // when is the next time we start. 
        // time is as an offset to start_time
        float next_time = (timer::approx_time_seconds() + 
                           aggregate_period[key] - start_time);
        rmi.broadcast(next_time, rmi.procid() == 0);
        next_schedule.push_back(std::make_pair(key, -next_time));
      }

      for (size_t i = 0;i < next_schedule.size(); ++i) {
        schedule.push(next_schedule[i].first, next_schedule[i].second);
      }
    }

    /**
     * Must be called on engine stop. Clears the internal scheduler
     * And resets all incomplete states.
     */
    void stop() {
      schedule.clear();
      // clear the aggregators
      {
        typename std::map<std::string, imap_reduce_base*>::iterator iter =
                                                          aggregators.begin();
        while (iter != aggregators.end()) {
          iter->second->clear_accumulator();
          ++iter;
        }
      }
      // clear the asynchronous state
      {
        typename std::map<std::string, async_aggregator_state>::iterator
                                                  iter = async_state.begin();
        while (iter != async_state.end()) {
          delete iter->second.root_reducer;
          for (size_t i = 0;
               i < iter->second.per_thread_aggregation.size();
               ++i) {
            delete iter->second.per_thread_aggregation[i];
          }
          iter->second.per_thread_aggregation.clear();
          ++iter;
        }
        async_state.clear();
      }
    }


    std::set<std::string> get_all_periodic_keys() const {
      typename std::map<std::string, float>::const_iterator iter =
                                                    aggregate_period.begin();
      std::set<std::string> ret;
      while (iter != aggregate_period.end()) {
        ret.insert(iter->first);
        ++iter;
      }
      return ret;
    }
    
    
    template <typename ResultType, typename MapFunctionType>
    ResultType map_reduce_vertices(MapFunctionType mapfunction) {
      ASSERT_MSG(graph.is_finalized(), "Graph must be finalized");

      if (rmi.procid() == 0) {
        // do a runtime type check
        test_vertex_mapper_type<ResultType, MapFunctionType>();
      }
      
      rmi.barrier();
      bool global_result_set = false;
      ResultType global_result = ResultType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        bool result_set = false;
        ResultType result = ResultType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
          if (graph.l_vertex(i).owner() == rmi.procid()) {
            if (!result_set) {
              vertex_type vtx(graph.l_vertex(i));
              result = mapfunction(*context, vtx);
              result_set = true;
            }
            else if (result_set){
              vertex_type vtx(graph.l_vertex(i));
              result += mapfunction(*context, vtx);
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (result_set) {
            if (!global_result_set) {
              global_result = result;
              global_result_set = true;
            }
            else {
              global_result += result;
            }
          }
        }
      }
      conditional_addition_wrapper<ResultType> wrapper(global_result, global_result_set);
      rmi.all_reduce(wrapper);
      return wrapper.value;
    }


    template <typename ResultType, typename MapFunctionType>
    ResultType map_reduce_edges(MapFunctionType mapfunction) {
      ASSERT_MSG(graph.is_finalized(), "Graph must be finalized");
      
      if (rmi.procid() == 0) {
        // do a runtime type check
        test_edge_mapper_type<ResultType, MapFunctionType>();
      }
      
      rmi.barrier();
      bool global_result_set = false;
      ResultType global_result = ResultType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        bool result_set = false;
        ResultType result = ResultType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
          foreach(const local_edge_type& e, graph.l_vertex(i).in_edges()) {
            if (!result_set) {
              edge_type edge(e);
              result = mapfunction(*context, edge);
              result_set = true;
            }
            else if (result_set){
              edge_type edge(e);
              result += mapfunction(*context, edge);
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (result_set) {
            if (!global_result_set) {
              global_result = result;
              global_result_set = true;
            }
            else {
              global_result += result;
            }
          }
        }
      }

      conditional_addition_wrapper<ResultType> wrapper(global_result, global_result_set);
      rmi.all_reduce(wrapper);
      return wrapper.value;
    }

    template <typename TransformType>
    void transform_vertices(TransformType transform_functor) {
      ASSERT_MSG(graph.is_finalized(), "Graph must be finalized");
      rmi.barrier();
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
        if (graph.l_vertex(i).owner() == rmi.procid()) {
          vertex_type vtx(graph.l_vertex(i));
          transform_functor(*context, vtx);
        }
      }
      rmi.barrier();
      graph.synchronize();
    }


    template <typename TransformType>
    void transform_edges(TransformType transform_functor) {
      ASSERT_MSG(graph.is_finalized(), "Graph must be finalized");
      rmi.barrier();
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)graph.num_local_vertices(); ++i) {
        foreach(const local_edge_type& e, graph.l_vertex(i).in_edges()) {
          edge_type edge(e);
          transform_functor(*context, edge);
        }
      }
      rmi.barrier();
    }
    
    
    ~distributed_aggregator() {
      delete context;
    }
  }; 


}; // end of graphlab namespace
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/docs/faq.dox
================================================
/**
 
\page FAQ FAQ

##I am trying to run GraphLab distributed using files from HDFS as input. However, I am getting screens full of errors

You may need to set the CLASSPATH environment variable.
Instead of running:

\verbatim
mpiexec -n N ... graphlab_program ...
\endverbatim

Try running 

\verbatim
mpiexec -n N ... env CLASSPATH=`hadoop classpath` graphlab_program ...
\endverbatim

##I am trying to run GraphLab distributed, but it seems to be failing to find my graph input files. I am not using HDFS.

You need to make sure that all machines have access to the graph files at exactly the same paths.
i.e. Either you need to have an NFS file share, or a distributed file system, or you need to
copy all graph files to all machines.


 */


================================================
FILE: src/graphlab/docs/overview.dox
================================================
/**

  \defgroup engines GraphLab Engines
\defgroup util GraphLab Utility Classes and Functions
\defgroup rpc GraphLab RPC
\defgroup random Random Number Generators
\defgroup group_serialization Serialization
\defgroup toolkits GraphLab Toolkits
\defgroup httpserver Metrics Reporting Webserver
\defgroup warp Warp System
\mainpage 
   

  The GraphLab project started in 2009 to develop a new parallel
  computation abstraction tailored to machine learning. GraphLab 1.0
  represents our first shared memoy design which, through the addition
  of several matrix factorization toolkits, started to grow a community of users.

  In the last couple of years, we have focused our development effort
  on the distributed environment. Unfortunately, it took nearly a year
  to figure out that distributing the GraphLab 1 abstraction was
  excessively complicated and is unable to scale up to power-law
  graphs commonly seen in the real world.

  In GraphLab 2.1, we completely redesign of the GraphLab 1 framework
  for the distributed environment. The implementation is distributed
  by design and a "shared-memory" execution is essentially running a
  distributed system on a cluster of 1 machine. 
  
  And in this new release of GraphLab 2.2, we introduce the new \ref warp
  which through the use of fine-grained user-mode threading, introduces a new
  API which brings about a major increase in useability, and will allow us to 
  provide new capabilities more easily in the future.

  There are two starting points where one may begin using GraphLab.  \li \ref
  toolkits "Toolkits" You can lookup the toolkit documentation here if you have
  a computation task which is already implemented by one of our toolkits.  \li
  \ref using_graphlab "GraphLab C++ Tutorial" If you have a computation task
  which is not implemented by our toolkits, you could try implementing
  yourself! For now a certain degree of C++ knowledge is required. 

  The new GraphLab 2.2 \ref warp is available for experimentation. A
  \ref using_warp tutorial is provided, and we are are looking for feedback 
  to continue extending and improving the Warp system. Performance tuning is 
  also underway.

  Software Stack
  =============
  \image html software_stack.png 
  \image html system_overview.png 


*/


================================================
FILE: src/graphlab/docs/using.dox
================================================
/**
  \page using_graphlab Basic GraphLab Tutorial

  In this example, we would implement a simple PageRank application 
  from scratch, demonstrating all the core GraphLab concepts from loading a 
  graph to performing computation and saving the results. 
 
  The implementation philosophy of the GraphLab API is to expose an
  MPI-like SPMD (Single Program Multiple Data) Interface.
  That is to say, we try to enforce the illusion that all machines 
  are running the same operations in lock-step.

  For instance, a GraphLab program in pseudo code may look like:

  \verbatim
  main() {
    ...
    Load Graph from file using parsing_function; 
    global variable RESULT = map reduce on graph vertices using map_function;
    transform graph vertices using transform_function;
    ...
    create an asynchronous engine an attach it to the graph;

    engine.start();

    save Graph using saver() object;
  }
  \endverbatim

  In the distributed environment, each of these operations are run in lock step.
  However, each individual operation may have significant complexity (perhaps
  even running asynchronously). To support this illusion requires the user to
  implement a number of external functions / classes. For instance, in the 
  above pseudo-code, the user needs to implement a \c map_function, a 
  \c transform_function , etc.

  While GraphLab's RPC implementation permits the implementation of much more
  complex computation/communication interleaving behavior, we discourage it
  and we encourage users to use the aggregate "SPMD"-like operations as much
  as possible. Indeed, none of the toolkit applications we implemented require
  any more than these operations.
  As we understand the abstraction needs of the community better,
  we can continue to expand on the scope of these operations.
   

  The tutorial is divided into the following sections:
  - \subpage using_graphlab_create_project 
  - \subpage using_graphlab_empty_app 
  - \subpage using_graphlab_distributed_graph 
  - \subpage using_graphlab_distributed_graph_load_data 
  - \subpage using_graphlab_distributed_graph_vertex_program 
  - \subpage using_scheduling 
  - \subpage using_saving_answers 
  - \subpage using_conclusion

  \page using_graphlab_create_project 1: Creating a GraphLab project

  To create a GraphLab project, simply create a sub-directory in the
  graphlab/apps/ folder with your project name. For instance,
  graphlab/apps/my_first_app. Within the sub-directory, create a text file
  called CMakeLists.txt with the following contents

  \verbatim
  project(My_Project)
  add_graphlab_executable(my_first_app my_first_app.cpp)
  \endverbatim

  The project name "My_Project" is an arbitrary name used to identify your
  application. <tt>add_graphlab_executable</tt> 
  is a CMake macro that will compile a program called <tt>my_first_app</tt>
  using the CPP file <tt>my_first_app.cpp</tt>, and linking in all
  GraphLab libraries and dependencies.

  If your program needs multiple CPP files simply append to the list.
  For instance:
  \verbatim
  add_graphlab_executable(my_first_app my_first_app.cpp tools.cpp stuff.cpp)
  \endverbatim
  will compile and link 3 cpp files together into a single program.

  For more complex uses of CMake, see the Cmake documentation 
  <a href=http://www.cmake.org/cmake/help/documentation.html> here</a>.

  In the \ref using_graphlab_empty_app "next section", we will implement "Hello World".

  \page using_graphlab_empty_app 2: Hello World in GraphLab

  To use GraphLab,
  \code
  #include <graphlab.hpp>
  \endcode

  All of GraphLab lives in the <code>graphlab</code> namespace. You may use
  \code
  using namespace graphlab;
  \endcode
  if you wish, but we 
  <a href=http://www.parashift.com/c++-faq-lite/coding-standards.html#faq-27.5>
  recommend against it</a>.

  Your main function should begin and end with:
  \code
  int main(int argc, char** argv) {
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
    
    ... main body ...
    graphlab::mpi_tools::finalize();
  }
  \endcode
  
  <code>dc</code> is the distributed communication layer which is needed by 
  a number of the core GraphLab objects, whether you are running distributed 
  or not.

  Place the following code in <tt>my_first_app.cpp</tt>. To create the 
  program run the configure script, than run "make" in the debug/ release/ 
  build folders. The program when executed, will print "Hello World!".
  \code
  #include <graphlab.hpp>
  int main(int argc, char** argv) {
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
 
    dc.cout() << "Hello World!\n";

    graphlab::mpi_tools::finalize();
  }
  \endcode

  \ref graphlab::distributed_control::cout "dc.cout()" provides a wrapper around 
  standard <tt>std::cout</tt>, but wraps it in a way that when used
  in a distributed environment, only one copy will print, even though all 
  machines execute it. To try that run the following:
  \verbatim
  mpiexec -n 4 ./my_first_app
  \endverbatim
  This should run 4 instances of "my_first_app" all on the local machine.
  However, only one "Hello World!" will be printed.

  \note The \ref graphlab::distributed_control "distributed_control" object is the
  core RPC implementation and has many other capabilities. See \ref RPC for
  details.

  In the \ref using_graphlab_distributed_graph "next section", 
  we will see how to define a distributed graph.

  \page using_graphlab_distributed_graph 3: Defining a Graph
 
  The datastructure which surrounds much of GraphLab's computation capabilities
  is the \ref graphlab::distributed_graph "distributed_graph".
  The Distributed Graph is a directed graph datastructure comprising of
  vertices and directed edges, but with no duplicated edges allowed. i.e. there
  can be only one edge from vertex A to vertex B, and one edge from vertex B
  to vertex A.  An arbitrary user data type can be associated with each vertex
  and each edge as long as the data type is \ref sec_serializable. 

  \section using_graphlab_distributed_graph_vdata Vertex Data

  Since we are writing PageRank, we will first we define a struct describing a
  web page. This will be the contents of the vertex. This struct here holds a
  name of the webpage, as well as the resultant PageRank. A constructor which
  assigns a name is provided for later convenience. Observe that we also
  defined a default constructor as this is \b required for it to be used in the
  graph.
  \code
  struct web_page {
    std::string pagename;
    double pagerank;
    web_page():pagerank(0.0) { }
    explicit web_page(std::string name):pagename(name),pagerank(0.0){ }
  };
  \endcode

  To make this \ref sec_serializable, we need to define a \c save and \c load 
  member function. The \c save function simply writes the \c pagename and 
  \c pagerank fields into the output archive object. The \c load function 
  performs the reverse. Care should be made to ensure that the \c save and
  \c load functions are symmetric.

  \code
  struct web_page {
    std::string pagename;
    double pagerank;
    web_page():pagerank(0.0) { }
    explicit web_page(std::string name):pagename(name),pagerank(0.0){ }

    void save(graphlab::oarchive& oarc) const {
      oarc << pagename << pagerank;
    }

    void load(graphlab::iarchive& iarc) {
      iarc >> pagename >> pagerank;
    }
  };
  \endcode

  \section using_graphlab_distributed_graph_edata Edge Data

  Since we do not need any information to be stored on the edges of the graph,
  we will just use the graphlab::empty data type which will ensure that
  the edge data does not take up any memory.


  \section using_graphlab_distributed_graph_defining_graph Defining the Graph

  The graphlab::distributed_graph data type takes two template arguments:
    \li \c VertexData The type of data to be stored on each vertex
    \li \c EdgeData The type of data to be stored on each edge

  
  For convenience, we define the type of the graph using a typedef:
  \code
  typedef graphlab::distributed_graph<web_page, graphlab::empty> graph_type;
  \endcode

  \section using_graphlab_distributed_graph_putting_together Putting It Together

  At this point, our code looks like this:
  \code
  #include <string>
  #include <graphlab.hpp>

  struct web_page {
    std::string pagename;
    double pagerank;
    web_page():pagerank(0.0) { }
    explicit web_page(std::string name):pagename(name),pagerank(0.0){ }

    void save(graphlab::oarchive& oarc) const {
      oarc << pagename << pagerank;
    }

    void load(graphlab::iarchive& iarc) {
      iarc >> pagename >> pagerank;
    }
  };
 
  typedef graphlab::distributed_graph<web_page, graphlab::empty> graph_type;

  int main(int argc, char** argv) {
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
 
    dc.cout() << "Hello World!\n";

    graphlab::mpi_tools::finalize();
  }
  \endcode

  We have constructed the datatypes required for the graph to operate.
  In the \ref using_graphlab_distributed_graph_load_data "next section",
  we will fill out the graph using some synthetic data.

  \page using_graphlab_distributed_graph_load_data 4: Loading Graph Data

  The distributed_graph requires each vertex to have a numeric ID of type
  graphlab::vertex_id_type : at a moment a 32-bit integer (this will be 
  lengthened to 64-bits or greater in the near future so you should not
  depend on it being 32-bits). Vertices do not need to be consecutively
  numbered. The ID corresponding to 
  <tt>(graphlab::vertex_id_type)(-1)</tt> (or the maximum integer value) is
  reserved for internal use and should not be assigned.
  
  
  To load graph data from a file, we need to implement a line parser for the
  distributed_graph's 
  \ref graphlab::distributed_graph::load(std::string path, line_parser_type line_parser) load()
  function.
  
  The \c load() load works in a simple straight-forward way. It assumes 
  that each line in the file is "independent"; i.e. the order in which lines
  in the file appear do not matter. Each line is then passed into the user
  provided line-parsing function which then proceeds to add vertices or edges
  to the graph.


  For instance, we could describe the following input file for
  our pagerank task.

  \verbatim
  1 a.com 4 10
  4 b.org 10
  10 c.edu 11 1
  11 d.gov 10
  \endverbatim

  Where each line contains first an ID for the page, then the name of
  the page, and finally a list of all the IDs the page links to.
  Thus describing the following graph:
  
  \image html example_webgraph.gif
  
  We can then implement the following line parser:
  \code
  bool line_parser(graph_type& graph, 
                   const std::string& filename, 
                   const std::string& textline) {
    std::stringstream strm(textline);
    graphlab::vertex_id_type vid;
    std::string pagename;
    // first entry in the line is a vertex ID
    strm >> vid;
    strm >> pagename;
    // insert this web page
    graph.add_vertex(vid, web_page(pagename));
    // while there are elements in the line, continue to read until we fail
    while(1){
      graphlab::vertex_id_type other_vid;
      strm >> other_vid;
      if (strm.fail()) 
        return true;
      graph.add_edge(vid, other_vid);
    }
    return true;
  }
  \endcode

  To load this file, we simply construct a graph,

  \code
  graph_type graph(dc);
  graph.load("graph.txt", line_parser);
  \endcode

  The key behind the \c load() function is that its actual behavior is to
  load <b>all files which begin with the name provided</b>. In other words,
  if the graph file is cut into many smaller pieces such as <tt>graph.txt.1
  graph.txt.2, graph.txt.3</tt>, etc, the system will load all the files
  matching \c graph.txt*, and possibly in parallel (if running in a distributed
  environment, it is important to ensure that all machines can access the
  same set of files). Furthermore, the \c load() function automatically
  supports HDFS loading and obeys the same rules. Finally, if a filename
  ends with the \c .gz extension, it is automatically treated as a gzip 
  compressed file and will be automatically decompressed for reading.

  \code
  graph.load("hdfs:///hdfsnamenode/data/graph", line_parser);
  \endcode
  
  will load all files on the name node \c hdfsnamenode, and matching the pattern
  \c /data/graph*.

  Multiple calls may be made to \c load() to load different sets of files: 
  each call may use a different line parser. The only requirement is that
  each edge and each vertex be added no more than once.

  Once all graph data is loaded, a call to 
  \code
  graph.finalize();
  \endcode
  is necessary to commit the graph structure. This will reorganize the graph
  datastructures for optimal run-time access.

  At this point, your code will look like this:

  \code
  #include <string>
  #include <graphlab.hpp>

  struct web_page {
    std::string pagename;
    double pagerank;
    web_page():pagerank(0.0) { }
    explicit web_page(std::string name):pagename(name),pagerank(0.0){ }

    void save(graphlab::oarchive& oarc) const {
      oarc << pagename << pagerank;
    }

    void load(graphlab::iarchive& iarc) {
      iarc >> pagename >> pagerank;
    }
  };
 
  typedef graphlab::distributed_graph<web_page, graphlab::empty> graph_type;


  bool line_parser(graph_type& graph, 
                   const std::string& filename, 
                   const std::string& textline) {
    std::stringstream strm(textline);
    graphlab::vertex_id_type vid;
    std::string pagename;
    // first entry in the line is a vertex ID
    strm >> vid;
    strm >> pagename;
    // insert this web page
    graph.add_vertex(vid, web_page(pagename));
    // while there are elements in the line, continue to read until we fail
    while(1){
      graphlab::vertex_id_type other_vid;
      strm >> other_vid;
      if (strm.fail())
        return true;
      graph.add_edge(vid, other_vid);
    }
    return true;
  }

  int main(int argc, char** argv) {
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
 
    graph_type graph(dc);
    graph.load("graph.txt", line_parser);

    graphlab::mpi_tools::finalize();
  }
  \endcode

  \note The stringstream is somewhat slow and is not the fastest way to 
  parse a string. Significant performance gains can be made through the use of
  C parsing or perhaps even boost::spirit.

  \section load_data_other_topics Other Topics

  The distributed graph provides several built-in formats which can be used
  to save/load graph structure. See 
  \ref graphlab::distributed_graph::save_format() "distributed_graph::save_format()"
  and
  \ref graphlab::distributed_graph::load_format() "distributed_graph::load_format()"
  for more details.

  The distributed_graph takes as a second option, a graphlab::graphlab_options
  datastructure which contains runtime options that can affect the behavior
  and performance of GraphLab. See \ref graphlab::distributed_graph::distributed_graph() "the constructor"
  for more details.

  GraphLab provides a convenient command line parser in
  graphlab::command_line_options (really, a wrapper around
  boost::program_options simpler features). The parser is easy to use and
  automatically exposes GraphLab's runtime options on the command line. 


  \page using_graphlab_distributed_graph_vertex_program 5: Writing the PageRank Vertex Program

  Finally, we get to writing the PageRank vertex program itself.
  A detailed description the pagerank update can be found on wikipedia 
  <a href=http://en.wikipedia.org/wiki/PageRank>here</a>.
  
  In pseudo-code:

  \verbatim
  To compute PageRank of page P:
    acc = 0;
    For Each In-page Q:
      acc += Q.pagerank / Q.num_out_links
    End
    P.pagerank = 0.85 * acc + 0.15
  \endverbatim
  
  We need to map the pagerank pseudo code to the GraphLab vertex program.
  Note: an alternate way of presenting the same algorithm in terms of probabilities is:
  \verbatim
  P.pagerank2 = 0.85 * acc + 0.15 / n
  \endverbatim
  where n is the total number of graph nodes. Both formulations are equivalent since one can easily verify that
  \verbatim
  P.pagerank2 * n = P.pagerank
  \endverbatim
  In GraphLab, we choose the first implementation, since dealing with graphs with billions of nodes will lead to numerical errors
  and pagerank values which are very close to zero. 
 
  \section using_graphlab_vertex_program PageRank Vertex Program

  A "vertex program" can be thought of as a little program which is executed on
  a vertex in the graph. The vertex program is short lived: it performs the following
  3 phases of execution: each phase providing it access to a different section of
  the neighborhood of the vertex, then is destroyed. 
  \li A \b gather phase where \ref graphlab::ivertex_program::gather() "gather()" function
      in the vertex program is called on each edge on the vertex's adjacent edges.
      returning a value with each gather.
  \li An \b apply phase where the values returned by the gather's are summed
      together and given to the \ref graphlab::ivertex_program::apply() "apply()"
      function in the vertex program.
  \li A \b scatter phase where \ref graphlab::ivertex_program::scatter() "scatter()"
      function in the vertex program is once again called on each edge on the
      vertex's adjacent edges.
  
  See graphlab::ivertex_program for detailed documentation on the behavior of
  the vertex program.

  It is simplest to just demonstrate the PageRank vertex program in code:  

  \code
class pagerank_program :
            public graphlab::ivertex_program<graph_type, double>,
            public graphlab::IS_POD_TYPE {
public:
  // we are going to gather on all the in-edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }

  // for each in-edge gather the weighted sum of the edge.
  double gather(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    return edge.source().data().pagerank / edge.source().num_out_edges();
  }
  
  // Use the total rank of adjacent pages to update this page 
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    double newval = total * 0.85 + 0.15;
    vertex.data().pagerank = newval;
  }
  
  // No scatter needed. Return NO_EDGES 
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; 
  \endcode

  The \c pagerank_program inherits from graphlab::ivertex_program which is
  itself templatized over the type of the graph (\c graph_type) and the type
  returned by the \c gather() obersion. 

  The pagerank_program must also be \ref sec_serializable. Since this program
  does not contain any data elements, it is a POD type and is sufficient to
  inherit from graphlab::IS_POD_TYPE.

  We will now explain each function.

##  <b>gather_edges(icontext_type& context, const vertex_type& vertex)</b>
  The \ref graphlab::ivertex_program::gather_edges() "gather_edges()" function
  returns the collection of edges to gather. It may return graphlab::IN_EDGES,
  graphlab::OUT_EDGES, graphlab::NO_EDGES, or graphlab::ALL_EDGES. The PageRank
  update uses only in pages, thus we return graphlab::IN_EDGES.

  The gather_edges() function is also passed a \c context object which provides
  additional access to the execution environment: such as obtaining the number
  of edges in the graph (\ref graphlab::icontext::num_edges() "num_edges()"),
  the ability to immediately stop execution (\ref graphlab::icontext::stop() "stop()")
  among others.

  It is also passed a reference to the vertex the current vertex_program is
  executing on through the \c vertex argument. Through \c vertex, the function
  can read the data on the vertex as well as obtain other meta-data such as
  the number of in-edges of the current vertex (see graphlab::distributed_graph::vertex_type).

## <b>gather(icontext_type& context, const vertex_type& vertex, edge_type& edge)</b>
  According to the PageRank equation, we must compute a weighted sum of the 
  in-pages.  The \ref graphlab::ivertex_program::gather() "gather()" function 
  is thus executed on each in-edge in of the current vertex, returning the edge's
  contribution to the "acc" value.  

  To compute the current edge's contribution to the weight, we use the \c edge
  argument which provides direct access to the data on the edge, the source vertex
  of the edge, and the destination vertex. (see graphlab::distributed_graph::edge_type)
  
  \note While \c gather() technically has a non-const reference to the source and 
  target vertex data through \c edge.source() and \c edge.target(), it should not modify
  them. The data on the edge (accessible through \c edge.data()) is modifiable however.

  Once the contribution is computed, we simply return it since the result
  from all \c gather() calls are summed up by the execution engine (using only the += 
  operator). The data type of the accumulation is a \b double, 
  and this must be provided in the second template argument of the ivertex_program
  the pagerank_program inheritted from.

##  <b>apply(icontext_type& context, vertex_type& vertex, const gather_type& total)</b>
  The returned values from each gather are implicitly summed up (in parallel)
  behind the scenes as passed to the apply() function in the \c total argument.
  Observe that now the \c vertex parameter is modifiable, and we use
  this to write the new pagerank to the current vertex.

  
##  <b>scatter_edges(icontext_type& context, const vertex_type& vertex)</b> 
  Scatter is similar to gather: it is executed on each edge of
  the vertex, but does not accumulate any values. 
  Since the pagerank computation does not require a scatter operation we simply
  have scatter_edges return graphlab::NO_EDGES.
  

  \section using_graphlab_vertex_program_running Running the Vertex Program

  To run the above vertex program on all vertices in the graph \b once, 
  we simply construct an engine in main() (after finalizing the graph)

  \code
  graphlab::omni_engine<pagerank_program> engine(dc, graph, "sync");
  engine.signal_all();
  engine.start();
  \endcode

  The \ref graphlab::omni_engine "omni_engine" is a engine wrapper that allows
  you to easily select between a synchronous engine or an asynchronous engine.
  In this case, we select a synchronous engine. Passing "async" will select an
  asynchronous engine. 

  \note If graphlab::command_line_options are used, it can be passed as an 
  additional 4th argument to the constructor. This will allow the engine type
  to be modified at runtime.

  The \ref graphlab::omni_engine::signal_all "signal_all()" function, as the
  name suggests, signals all the vertices in the graph to run. 
  \ref graphlab::omni_engine::start() "start()" will begin execution of all
  signaled vertices. Since the synchronous engine is selected all vertices will
  perform the gather/apply/scatter operations in lock-step. If the asynchronous
  engine is selected, the vertices will run asynchronously, but a distributed
  locking procedure is used internally to ensure data consistency.

  Each vertex in the graph will run exactly once. We could of course embed
  the signal+start operations in a loop to run multiple rounds, but that would 
  be inefficient. In the \ref using_scheduling "next section" we will learn how
  to signal vertices inside a vertex program.

 \page using_scheduling 6: Runtime Scheduling

 During engine execution, the engine maintains (in a distributed fashion),
 a schedule of vertex programs to run. The 
 \ref graphlab::omni_engine::signal_all "engine.signal_all()"
 function call essentially injects a list of all vertices in the graph into
 the engine's scheduler. In this section, we will see how the schedule
 can be modified during engine execution and how that could be used (to great
 effect in some cases) to accelerate convergence of your program.

  \section using_scheduling_self Self Scheduling Example

  The simplest form of "dynamic" scheduling is to repeat each vertex's execution
  for a certain fixed number of iterations (say 10). 
  To do that, we add a 
  "counter" to the data on each vertex by modifying the \c web_page struct:

  \code
  struct web_page {
    std::string pagename;
    double pagerank;
    int counter;

    web_page():pagerank(0.0),counter(0) { }
    explicit web_page(std::string name):pagename(name),
                          pagerank(0.0),counter(0){ }

    void save(graphlab::oarchive& oarc) const {
      oarc << pagename << pagerank << counter;
    }

    void load(graphlab::iarchive& iarc) {
      iarc >> pagename >> pagerank >> counter;
    }
  };
  \endcode

  Observe that the constructors were modified to initialize the counters at 0,
  and the save/load functions must now also save the counter variable.

  To achieve self-scheduling, we simply modify the \c apply() function in the
  \c pagerank_program to increment the counter, and signal the current vertex
  if the counter has not reached 10.
  \code
  // Use the total rank of adjacent pages to update this page 
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    double newval = total * 0.85 + 0.15;
    vertex.data().pagerank = newval;
    ++vertex.data().counter;
    if (vertex.data().counter < 10) context.signal(vertex);
    // of course, instead of simply "10" here, this could be comparing
    // against a global variable set by a command line option.
  }
  \endcode
  The \ref graphlab::icontext::signal() "context.signal(vertex)" call
  inserts the current vertex into the scheduler. The guarantee provided by the
  \c signal() call is that:
  <b> The vertex signaled will be eventually executed some time after completion
  of the \c signal() function call. </b>
 
  If used together with the synchronous engine, this program will perform
  exactly the equivalent of the traditional "matrix-multiplication-like"
  PageRank iteration. 

  \section using_scheduling_dynamic Dynamic Scheduling Example

  Alternatively, we could take a more "contextual" approach to scheduling.
  Considering that PageRank is a numeric procedure performed on a large graph,
  it is not unreasonable to believe that some parts of the graph will 
  converge before other parts of the graph. We could therefore 
  save computation if we only recompute vertices which may change by large
  amounts.

  To implement this, we consider the pagerank_program implemented earlier.
  We will not make modifications to the gather phases, but we will change the
  apply phase and introduce a scatter phase. The goal is to achieve the following:
  
  \li If the current vertex's PageRank does not change much, no additional
      action is performed.
  \li However, if the current vertex's PageRank changed by greater than some
      threshold (1E-3), we will \c signal() all out-pages to recompute their
      PageRank value.

  \code
class dynamic_pagerank_program :
             public graphlab::ivertex_program<graph_type, double>,
             public graphlab::IS_POD_TYPE {
private:
  // a variable local to this program
  bool perform_scatter;
public:
  // no changes to gather_edges and gather
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }
  double gather(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    return edge.source().data().pagerank / edge.source().num_out_edges();
  }
  

  // Use the total rank of adjacent pages to update this page 
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    double newval = total * 0.85 + 0.15;
    double prevval = vertex.data().pagerank;
    vertex.data().pagerank = newval;
    perform_scatter = (std::fabs(prevval - newval) > 1E-3);
  }
  
  // The scatter edges depend on whether the pagerank has converged 
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    if (perform_scatter) return graphlab::OUT_EDGES;
    else return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    context.signal(edge.target());
  }
}; 
  \endcode

  Firstly, we observe that we introduced a private variable \c perform_scatter
  to the program. This variable is short-lived and is local to this particular
  execution of the pagerank program. In the \c apply() function, we compute the
  change to the current vertex's pagerank, and if it is above a certain threshold,
  we set the \c perform_scatter variable to true. 
  
  This next influences the 
  behavior of the \c scatter_edges() function. If \c perform_scatter is false,
  (i.e. insufficient change was made to the current PageRank), we do not perform a scatter.
  However, if sufficient change was made, \c scatter_edges() will return
  graphlab::OUT_EDGES which will cause the \c scatter()  function to be executed
  on all out-going edges of the current vertex. The \c scatter() function then
  simply schedules/signals the destination vertex, requesting it to be executed
  in the future, picking up the large change made to the current vertex.

  If ran using the synchronous engine, you will observe that the time spent within
  each synchronous iteration decreases, as the number of "signalled" vertices
  in each iteration decreases over time. This also works well in the asynchronous
  setting where powerful dynamic schedulers are used to control the order
  of execution.

  \note If you dig further into the documentation you will see that the 
  signalling operation can itself be used as a messaging primitive to carry a
  message to a destination vertex. This allows GraphLab v2.1 to in some sense,
  also include "Pregel" as part of the implementation. Furthermore, the
  message could define a "message priority" which can be used in conjunction with
  the priority-queue based dynamic schedulers to obtain greater control over
  the order of execution in the asynchronous engine.

  In the \ref using_saving_answers "next section", we will see how to save 
  output of the system.

 \page using_saving_answers 7: Saving Results

 Saving the graph requires us to implement a graph writer class comprising of
 two functions: \c save_vertex() and \c save_edge().

 \code
 class graph_writer {
   public:
      std::string save_vertex(graph_type::vertex_type v) { return ""; }
      std::string save_edge(graph_type::edge_type e) { return ""; }
 };
 \endcode

 The \c save_vertex() and \c save_edge() functions are respectively
 called on each vertex/edge in the graph. These functions return a string
 which is then directly written to the output file.

 For instance, to save an output file comprising of <tt>[webpage] [pagerank]</tt>
 lines, we may implement the following:

 \code
 class graph_writer {
   public:
      std::string save_vertex(graph_type::vertex_type v) {
        std::stringstream strm;
        // remember the \n at the end! This will provide a line break
        // after each page.
        strm << v.data().pagename << "\t" << v.data().pagerank << "\n";
        return strm.str();
      }
      std::string save_edge(graph_type::edge_type e) { return ""; }
 };
 \endcode

 Since we are not interested in the edges, the \c save_edge() function
 simply returns an empty string.

  \note The stringstream is somewhat slow and is not the fastest way to 
  write a string. Performance gains can be made through the use of
  C string operations.

  Then to write the graph, we will call
 \code
 graph.save("output",
            graph_writer(),
            false, // set to true if each output file is to be gzipped
            true, // whether vertices are saved
            false); // whether edges are saved
 \endcode

 This will save a sequence of files named <tt> output_1_of_N, output_2_of_N ... 
 </tt> where N is some integer. Concatenating all the files together will produce
 the combined output. If the gzip option is set, each of the files will have a
 \c .gz suffix and \c gunzip must be used to decompres the file for reading.

 If the output path is located on HDFS, for instance:
\verbatim
hdfs:///namenode/data/output
\endverbatim
The result will be saved to the HDFS cluster with the given namename,
    in the subdirectory /data with the filenames
<tt> output_1_of_N, output_2_of_N ... </tt>.

There are several other "built-in" saving formats which can be accessed through
the ref graphlab::distributed_graph::save_format() "graph.save_format()"function.

The \ref using_conclusion "next section" is a brief conclusion.

\page using_conclusion 8: Conclusion

This completes the core of the GraphLab tutorial. We have went through an overview
of 
\li How to start a GraphLab project
\li How to read a graph from disk/HDFS
\li How to write a vertex program
\li Dynamic Scheduling in a vertex program
\li How to save a graph to disk/HDFS

There are many 
more features which we are unable to introduce through the course of
this tutorial which we hope you will be able to discover by exploring the 
documentation. 

Some really useful tools that we would like to bring to your attention are:
<ul>
<li> Perform MapReduce over the vertices or the edges in the graph.
    <ul>
    <li> graphlab::distributed_graph::map_reduce_vertices()
    <li> graphlab::distributed_graph::map_reduce_edges()
    </ul>
<li> Perform MapReduce over the vertices or edges in the graph, while being
provided a context in the Map function, thus allowing finer grained control
over signalling.
    <ul>
    <li> graphlab::iengine::map_reduce_vertices()
    <li> graphlab::iengine::map_reduce_edges()
    </ul>
<li> Make a modification to all the vertices or edges in the graph
     <ul>
    <li> graphlab::distributed_graph::transform_vertices()
    <li> graphlab::distributed_graph::transform_edges()
    </ul>
<li> Make a modification to all the vertices or edges in the graph, while being
provided a context in the Map function, thus allowing finer grained control
over signalling.
     <ul>
    <li> graphlab::iengine::transform_vertices()
    <li> graphlab::iengine::transform_edges()
    </ul>
<li> Register a MapReduce operation which performs periodically while
a GraphLab engine is running thus allowing for global state.
    <ul>
    <li> graphlab::iengine::add_vertex_aggregator()
    <li> graphlab::iengine::add_edge_aggregator()
    <li> graphlab::iengine::aggregate_now()
    <li> graphlab::iengine::aggregate_periodic()
    </ul>
</ul>

Interleaving these operations together with GraphLab vertex_programs allow
for a huge amount of flexibility, allowing for a large number of algorithms to
be implemented easily, and efficiently.


*/


================================================
FILE: src/graphlab/docs/using_warp.dox
================================================
/**
\page using_warp GraphLab Warp System Tutorial

This tutorial demonstrate the latest feature of GraphLab -- the Warp System.
We encourage you to complete the basic graphlab tutorial before advancing.

The design of the warp system aims to improve the programming interface of
GraphLab by simplyfing the procedure of writing the vertex program without
sacrificing performance. The basic design of the Warp system lies in use of 
fine-grained user-mode threading to hide communication latency of blocking 
calls; and as such expose a more intuitive and easy to use API Interface.

We begin with a simple synthetic example. Say, we have a graph with an 
integer on each vertex, and we would like to compute on each vertex, the total
value of its neighbors.

\code
struct vertex_data {
  int value;
  int neighbor_total;
};

typedef graphlab::distributed_graph<vertex_data, empty> graph_type;
\endcode

The Warp System is included by including the single header

\code
#include <graphlab/warp.hpp>
\endcode

One of the key functions the warp engine provides, is a parfor over all 
vertices, excuting a single function on all vertices.

\code
// Runs the compute_neighborhood_total function on all vertices in the graph
graphlab::warp::parfor_all_vertices(graph, compute_neighborhood_total);

void compute_neighborhood_total(graph_type::vertex_type vertex) {
  ...
}
\endcode

Now, within the compute_neighborhood_total function, we would like to 
compute the sum of the "value" field of all neighboring vertices, and assign
it to the "neighbor_total" value of the current vertex. However, the 
parfor_all_vertices function requires that compute_neighborhood_total only
takes a single argument: the vertex, so how do we get the value of the
neighbors? We use a call to a warp function called warp::map_reduce_neighborhood(),
which allows us to compute an aggregation over the neighborhood of the graph

\code
int gather_value(graph_type::edge_type edge
                 graph_type::vertex_type other /* the other vertex*/ ) {
  return other.value;
}

void combine(int& a, const int& b) {
  a += b;
}

void compute_neighborhood_total(graph_type::vertex_type vertex) {
  vertex.value = graphlab::warp::map_reduce_neighborhood(vertex, ALL_EDGES, gather_value, combine); 
}
\endcode

The warp::map_reduce_neighborhood() function calls the gather_value function
on all adjacent edges of the graph. The return value of the gather_value 
function is then combined using the combine function, and the result returned.
Here, we explicitly defined a combine function, but there is a default combiner 
provided which simply uses +=. As such, in this case, the combiner is in fact, optional,
and the following will work just fine:

\code
void compute_neighborhood_total(graph_type::vertex_type vertex) {
  vertex.value = graphlab::warp::map_reduce_neighborhood(vertex, ALL_EDGES, gather_value); 
}
\endcode

The trick to the Warp System is that in the distributed setting, 
warp::map_reduce_neighborhood() can be a distributed call. However, through 
the use of fine-grained threading (fibers), we can hide the cost of the 
distributed latency by creating thousands of fibers to evaluate the parfor.

In addition to the warp::parfor_all_vertices() function and the warp::map_reduce_neighborhood()
  function we demonstrated, the entire Warp system comprises of 
an asynchronous \ref graphlab::warp_engine "warp engine", which operates similarly
to the asynchronous engine, but allows you to implement an arbitrary function
rather than the restricted Gather-Apply-Scatter model, and 4 basic functions.

- \ref graphlab::warp::parfor_all_vertices() "warp::parfor_all_vertices()"
provides a simple parallel for loop over all vertices in the graph, or in a
given set of vertices. 

- \ref graphlab::warp::map_reduce_neighborhood() "warp::map_reduce_neighborhood()"
allows a map-reduce aggregation of the neighborhood of a vertex to be performed.

- \ref graphlab::warp::transform_neighborhood() "warp::transform_neighborhood()"
allows a parallel transformation of the neighborhood of a vertex to be performed.

- \ref graphlab::warp::broadcast_neighborhood() "warp::broadcast_neighborhood()"
allows a parallel transformation of the neighborhood of a vertex to be performed 
and also provides a warp_engine context.

In this example, we would demonstrate how PageRank can be implemented using
the Warp System in two ways, using the warp::parfor_all_vertices() , 
and using the WarpEngine. the same PageRank application using the
Warp System. See \ref warp for detailed documentation on
the behavior of the  Warp System.

- \subpage using_warp_graph_functions 
- \subpage using_warp_graph_vertex_program 


\page using_warp_graph_functions PageRank with Warp Parfor

  The warp functions provides great flexibility to write simple parallel function on graphs.
  For example, an PageRank program which computes asynchronous 
  sweeps over all vertices, can be written only using warp functions. 

  given a graph with a float on each vertex:

  \code
  typedef graphlab::distributed_graph<float, graphlab::empty> graph_type;
  \endcode

  We use the the parfor_all_vertices to run a pagerank function on all vertices
  \code
    int main(int argc, char** argv) {
      ...
      for (int i = 0; i < NUM_ITER; ++i) {
        // runs the pagerank function on all the vertices in the graph.
        graphlab::warp::parfor_all_vertices(graph, pagerank); 
      }
      ...
    }


    void pagerank(graph_type::vertex_type vertex) {
      ...
    }
  \endcode


  The pagerank function then simply has to use a warp::map_reduce_neighborhood()
  call to compute the weighted sum of then neighborhood's PageRank value.
  Note that here, we use the default combiner (a += operation) to merge the results
  from the pagerank_map function.

  \code
    float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
      return other.data() / other.num_out_edges();
    }

    void pagerank(graph_type::vertex_type vertex) {
      // computes an aggregate over the neighborhood using map_reduce_neighborhood
      vertex.data() = 0.15 + 0.85 * graphlab::warp::map_reduce_neighborhood(vertex,
                                                                            IN_EDGES,
                                                                            pagerank_map);
    }

  \endcode

  Using c++11 lambda we can further simplify the pagerank function.
  \code
    void pagerank(graph_type::vertex_type vertex) {
    // computes an aggregate over the neighborhood using map_reduce_neighborhood
    vertex.data() = 0.15 + 0.85 * graphlab::warp::map_reduce_neighborhood(vertex,
                                            IN_EDGES,
                                            [](graph_type::edge_type edge, 
                                                graph_type::vertex_type other) { 
                                              return other.data() / other.num_out_edges();
                                            });
    }
  \endcode
  It is important that the C++11 lambda represent
  a regular function pointer, and must not capture a closure. (i.e. you cannot 
  use [=] or [&]).

  If additional parameters must be passed to the mapper or combiner functions,
  an optional "extra argument" can be specified.

  \code
    float pagerank_map(graph_type::edge_type edge, 
                      graph_type::vertex_type other,
                      const float& weight) {
      return 0.85 * other.data() / other.num_out_edges();
    }

    void combiner(float& a, const float& b, const float& other) {
      a += b;
    }

    void pagerank(graph_type::vertex_type vertex) {
      // computes an aggregate over the neighborhood using map_reduce_neighborhood
      vertex.data() = 0.15 + graphlab::warp::map_reduce_neighborhood(vertex,
                                       IN_EDGES,
                                       float(0.85), // this argument will show up as 
                                                    // the 3rd argument in pagerank_map
                                       pagerank_map,
                                       combiner);
    }
  \endcode

\page using_warp_graph_vertex_program PageRank with the Warp Engine
   We use the Warp system to provide a dynamic asynchronous engine, similar in 
   nature to the \ref asynchronous_engine "asynchronous engine", but without
   the Gather-Apply-Scatter limitations. Instead you simply specify an 
   update function. Which is of the type
   \code
     void update_function(engine_type::context& context,
                          graph_type::vertex_type vertex) {
     }
   \endcode

   Within the update function, All blocking warp functions such as 
    warp::map_reduce_neighborhood(),
    warp::transform_neighborhood() and
    warp::broadcast_neighborhood() 
   can be used to make changes to the graph data, and to 
   schedule other vertices for computation.

   \section using_warp_graph_vertex_program_updatefn Pagerank Update Function 

  Given a graph with a float on each vertex:

  \code
  typedef graphlab::distributed_graph<float, graphlab::empty> graph_type;
  \endcode 

  we first define the engine type


  \code
  typedef graphlab::warp::warp_engine<graph_type> engine_type;
  \endcode 

   Now PageRank can be written using the Warp Engine, by defining an update
   function:

   \code

      float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
        return other.data() / other.num_out_edges();
      }
      
      void signal_neighbor(engine_type::context& context,
                           graph_type::edge_type edge, graph_type::vertex_type other) {
        context.signal(other);
      }


      void pagerank_update_function(engine_type::context& context,
                                    graph_type::vertex_type vertex) {

        // save the old pagerank value
        float oldval = vertex.data();

        // compute the new pagerank using blocking warp function
        vertex.data() = 0.15 + 0.85 *graphlab::warp::map_reduce_neighborhood(vertex,
                                                                    IN_EDGES,
                                                                    pagerank_map);

        // Schedule out edges if we exceed tolerance.
        if (std::fabs(oldval - vertex.data()) > TOLERANCE) {
          graphlab::warp::broadcast_neighborhood(context,
                                       vertex,
                                       OUT_EDGES,
                                       signal_neighbor);
        }
      }
   \endcode

   The broadcast_neighborhood call simply runs the provided function
   (signal_neighbor), on all selected edges (OUT_EDGES) in this case.
   the warp::broadcast_neighborhood() function requires the context to allow the signal_neighbor
   function to perform dynamic scheduling. warp::map_reduce_neighborhood()
   essentially accomplishes the same as the "Gather" function in the GAS model,
   and warp::broadcast_neighborhood() essentially accomplishes the same role as 
   as the Scatter, allowing you to make modifications to the edges on the graph.

   As you can see, the warp engine let you write much simpler vertex update
   function comparing to the vertex_program used in previous versions of
   engines.


   \section using_warp_graph_vertex_program_running Running the Update Function 
    To run the above vertex program on all vertices in the graph \b once, 
    we simply construct an engine in main() (after finalizing the graph)

    \code
    int main(int argc, char** argv) {
      ...

      graphlab::warp::warp_engine<graph_type> engine(dc, graph);
      // sets the update function to use
      engine.set_update_function(pagerank_update_function);
      // signals all vertices to run. warp::warp_engine::signal_vset()
      // can also be used to signal a subset of vertices
      engine.signal_all();
      // run the engine until scheduler is empty.
      engine.start();

      ...
    }
    \endcode

    Just like the asynchronous engine, there are numerous options for the 
    scheduler type, and scheduler capabilities, which we willl not go into great detail here.
*/


================================================
FILE: src/graphlab/engine/CMakeLists.txt
================================================
project(GraphLab)

# subdirs(callback)


================================================
FILE: src/graphlab/engine/async_consistent_engine.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ASYNC_CONSISTENT_ENGINE
#define GRAPHLAB_ASYNC_CONSISTENT_ENGINE

#include <deque>
#include <boost/bind.hpp>

#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/scheduler/scheduler_factory.hpp>
#include <graphlab/scheduler/get_message_priority.hpp>
#include <graphlab/vertex_program/ivertex_program.hpp>
#include <graphlab/vertex_program/icontext.hpp>
#include <graphlab/vertex_program/context.hpp>
#include <graphlab/engine/iengine.hpp>
#include <graphlab/engine/execution_status.hpp>
#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/engine/distributed_chandy_misra.hpp>
#include <graphlab/engine/message_array.hpp>

#include <graphlab/util/tracepoint.hpp>
#include <graphlab/util/memory_info.hpp>
#include <graphlab/util/generics/conditional_addition_wrapper.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/rpc/fiber_async_consensus.hpp>
#include <graphlab/aggregation/distributed_aggregator.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
#include <graphlab/macros_def.hpp>


namespace graphlab {


  /**
   * \ingroup engines
   *
   * \brief The asynchronous consistent engine executed vertex programs
   * asynchronously and can ensure mutual exclusion such that adjacent vertices
   * are never executed simultaneously. The default mode is "factorized"
   * consistency in which only individual gathers/applys/
   * scatters are guaranteed to be consistent, but this can be strengthened to
   * provide full mutual exclusion.
   *
   *
   * \tparam VertexProgram
   * The user defined vertex program type which should implement the
   * \ref graphlab::ivertex_program interface.
   *
   * ### Execution Semantics
   *
   * On start() the \ref graphlab::ivertex_program::init function is invoked
   * on all vertex programs in parallel to initialize the vertex program,
   * vertex data, and possibly signal vertices.
   *
   * After which, the engine spawns a collection of threads where each thread
   * individually performs the following tasks:
   * \li Extract a message from the scheduler.
   * \li Perform distributed lock acquisition on the vertex which is supposed
   * to receive the message. The lock system enforces that no neighboring
   * vertex is executing at the same time. The implementation is based
   * on the Chandy-Misra solution to the dining philosophers problem.
   * (Chandy, K.M.; Misra, J. (1984). The Drinking Philosophers Problem.
   *  ACM Trans. Program. Lang. Syst)
   * \li Once lock acquisition is complete,
   *  \ref graphlab::ivertex_program::init is called on the vertex
   * program. As an optimization, any messages sent to this vertex
   * before completion of lock acquisition is merged into original message
   * extracted from the scheduler.
   * \li Execute the gather on the vertex program by invoking
   * the user defined \ref graphlab::ivertex_program::gather function
   * on the edge direction returned by the
   * \ref graphlab::ivertex_program::gather_edges function.  The gather
   * functions can modify edge data but cannot modify the vertex
   * program or vertex data and can be executed on multiple
   * edges in parallel.
   * * \li Execute the apply function on the vertex-program by
   * invoking the user defined \ref graphlab::ivertex_program::apply
   * function passing the sum of the gather functions.  If \ref
   * graphlab::ivertex_program::gather_edges returns no edges then
   * the default gather value is passed to apply.  The apply function
   * can modify the vertex program and vertex data.
   * \li Execute the scatter on the vertex program by invoking
   * the user defined \ref graphlab::ivertex_program::scatter function
   * on the edge direction returned by the
   * \ref graphlab::ivertex_program::scatter_edges function.  The scatter
   * functions can modify edge data but cannot modify the vertex
   * program or vertex data and can be executed on multiple
   * edges in parallel.
   * \li Release all locks acquired in the lock acquisition stage,
   * and repeat until the scheduler is empty.
   *
   * The engine threads multiplexes the above procedure through a secondary
   * internal queue, allowing an arbitrary large number of vertices to
   * begin processing at the same time.
   *
   * ### Construction
   *
   * The asynchronous consistent engine is constructed by passing in a
   * \ref graphlab::distributed_control object which manages coordination
   * between engine threads and a \ref graphlab::distributed_graph object
   * which is the graph on which the engine should be run.  The graph should
   * already be populated and cannot change after the engine is constructed.
   * In the distributed setting all program instances (running on each machine)
   * should construct an instance of the engine at the same time.
   *
   * Computation is initiated by signaling vertices using either
   * \ref graphlab::async_consistent_engine::signal or
   * \ref graphlab::async_consistent_engine::signal_all.  In either case all
   * machines should invoke signal or signal all at the same time.  Finally,
   * computation is initiated by calling the
   * \ref graphlab::async_consistent_engine::start function.
   *
   * ### Example Usage
   *
   * The following is a simple example demonstrating how to use the engine:
   * \code
   * #include <graphlab.hpp>
   *
   * struct vertex_data {
   *   // code
   * };
   * struct edge_data {
   *   // code
   * };
   * typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
   * typedef float gather_type;
   * struct pagerank_vprog :
   *   public graphlab::ivertex_program<graph_type, gather_type> {
   *   // code
   * };
   *
   * int main(int argc, char** argv) {
   *   // Initialize control plain using mpi
   *   graphlab::mpi_tools::init(argc, argv);
   *   graphlab::distributed_control dc;
   *   // Parse command line options
   *   graphlab::command_line_options clopts("PageRank algorithm.");
   *   std::string graph_dir;
   *   clopts.attach_option("graph", &graph_dir, graph_dir,
   *                        "The graph file.");
   *   if(!clopts.parse(argc, argv)) {
   *     std::cout << "Error in parsing arguments." << std::endl;
   *     return EXIT_FAILURE;
   *   }
   *   graph_type graph(dc, clopts);
   *   graph.load_structure(graph_dir, "tsv");
   *   graph.finalize();
   *   std::cout << "#vertices: " << graph.num_vertices()
   *             << " #edges:" << graph.num_edges() << std::endl;
   *   graphlab::async_consistent_engine<pagerank_vprog> engine(dc, graph, clopts);
   *   engine.signal_all();
   *   engine.start();
   *   std::cout << "Runtime: " << engine.elapsed_seconds();
   *   graphlab::mpi_tools::finalize();
   * }
   * \endcode
   *
   * \see graphlab::omni_engine
   * \see graphlab::synchronous_engine
   *
   * <a name=engineopts>Engine Options</a>
   * =========================
   * The asynchronous engine supports several engine options which can
   * be set as command line arguments using \c --engine_opts :
   *
   * \li \b timeout (default: infinity) Maximum time in seconds the engine will
   * run for. The actual runtime may be marginally greater as the engine
   * waits for all threads and processes to flush all active tasks before
   * returning.
   * \li \b factorized (default: true) Set to true to weaken the consistency
   * model to factorized consistency where only individual gather/apply/scatter
   * calls are guaranteed to be locally consistent. Can produce massive
   * increases in throughput at a consistency penalty.
   * \li \b nfibers (default: 10000) Number of fibers to use
   * \li \b stacksize (default: 16384) Stacksize of each fiber.
   */
  template<typename VertexProgram>
  class async_consistent_engine: public iengine<VertexProgram> {

  public:
    /**
     * \brief The user defined vertex program type. Equivalent to the
     * VertexProgram template argument.
     *
     * The user defined vertex program type which should implement the
     * \ref graphlab::ivertex_program interface.
     */
    typedef VertexProgram vertex_program_type;

    /**
     * \brief The user defined type returned by the gather function.
     *
     * The gather type is defined in the \ref graphlab::ivertex_program
     * interface and is the value returned by the
     * \ref graphlab::ivertex_program::gather function.  The
     * gather type must have an <code>operator+=(const gather_type&
     * other)</code> function and must be \ref sec_serializable.
     */
    typedef typename VertexProgram::gather_type gather_type;

    /**
     * \brief The user defined message type used to signal neighboring
     * vertex programs.
     *
     * The message type is defined in the \ref graphlab::ivertex_program
     * interface and used in the call to \ref graphlab::icontext::signal.
     * The message type must have an
     * <code>operator+=(const gather_type& other)</code> function and
     * must be \ref sec_serializable.
     */
    typedef typename VertexProgram::message_type message_type;

    /**
     * \brief The type of data associated with each vertex in the graph
     *
     * The vertex data type must be \ref sec_serializable.
     */
    typedef typename VertexProgram::vertex_data_type vertex_data_type;

    /**
     * \brief The type of data associated with each edge in the graph
     *
     * The edge data type must be \ref sec_serializable.
     */
    typedef typename VertexProgram::edge_data_type edge_data_type;

    /**
     * \brief The type of graph supported by this vertex program
     *
     * See graphlab::distributed_graph
     */
    typedef typename VertexProgram::graph_type graph_type;

     /**
     * \brief The type used to represent a vertex in the graph.
     * See \ref graphlab::distributed_graph::vertex_type for details
     *
     * The vertex type contains the function
     * \ref graphlab::distributed_graph::vertex_type::data which
     * returns a reference to the vertex data as well as other functions
     * like \ref graphlab::distributed_graph::vertex_type::num_in_edges
     * which returns the number of in edges.
     *
     */
    typedef typename graph_type::vertex_type          vertex_type;

    /**
     * \brief The type used to represent an edge in the graph.
     * See \ref graphlab::distributed_graph::edge_type for details.
     *
     * The edge type contains the function
     * \ref graphlab::distributed_graph::edge_type::data which returns a
     * reference to the edge data.  In addition the edge type contains
     * the function \ref graphlab::distributed_graph::edge_type::source and
     * \ref graphlab::distributed_graph::edge_type::target.
     *
     */
    typedef typename graph_type::edge_type            edge_type;

    /**
     * \brief The type of the callback interface passed by the engine to vertex
     * programs.  See \ref graphlab::icontext for details.
     *
     * The context callback is passed to the vertex program functions and is
     * used to signal other vertices, get the current iteration, and access
     * information about the engine.
     */
    typedef icontext<graph_type, gather_type, message_type> icontext_type;

  private:
    /// \internal \brief The base type of all schedulers
    message_array<message_type> messages;

    /** \internal
     * \brief The true type of the callback context interface which
     * implements icontext. \see graphlab::icontext graphlab::context
     */
    typedef context<async_consistent_engine> context_type;

    // context needs access to internal functions
    friend class context<async_consistent_engine>;

    /// \internal \brief The type used to refer to vertices in the local graph
    typedef typename graph_type::local_vertex_type    local_vertex_type;
    /// \internal \brief The type used to refer to edges in the local graph
    typedef typename graph_type::local_edge_type      local_edge_type;
    /// \internal \brief The type used to refer to vertex IDs in the local graph
    typedef typename graph_type::lvid_type            lvid_type;

    /// \internal \brief The type of the current engine instantiation
    typedef async_consistent_engine<VertexProgram> engine_type;

    typedef conditional_addition_wrapper<gather_type> conditional_gather_type;
    
    /// The RPC interface
    dc_dist_object<async_consistent_engine<VertexProgram> > rmi;

    /// A reference to the active graph
    graph_type& graph;

    /// A pointer to the lock implementation
    distributed_chandy_misra<graph_type>* cmlocks;

    /// Per vertex data locks
    std::vector<simple_spinlock> vertexlocks;

    /// Total update function completion time
    std::vector<double> total_completion_time;

    /**
     * \brief This optional vector contains caches of previous gather
     * contributions for each machine.
     *
     * Caching is done locally and therefore a high-degree vertex may
     * have multiple caches (one per machine).
     */
    std::vector<gather_type>  gather_cache;

    /**
     * \brief A bit indicating if the local gather for that vertex is
     * available.
     */
    dense_bitset has_cache;

    bool use_cache;

    /// Engine threads.
    fiber_group thrgroup;

    //! The scheduler
    ischeduler* scheduler_ptr;

    typedef typename iengine<VertexProgram>::aggregator_type aggregator_type;
    aggregator_type aggregator;

    /// Number of kernel threads
    size_t ncpus;
    /// Size of each fiber stack
    size_t stacksize;
    /// Number of fibers
    size_t nfibers;
    /// set to true if engine is started
    bool started;

    bool track_task_time;
    /// A pointer to the distributed consensus object
    fiber_async_consensus* consensus;

    /**
     * Used only by the locking subsystem.
     * to allow the fiber to go to sleep when waiting for the locks to
     * be ready.
     */
    struct vertex_fiber_cm_handle {
      mutex lock;
      bool philosopher_ready;
      size_t fiber_handle;
    };
    std::vector<vertex_fiber_cm_handle*> cm_handles;

    dense_bitset program_running;
    dense_bitset hasnext;

    // Various counters.
    atomic<uint64_t> programs_executed;

    timer launch_timer;

    /// Defaults to (-1), defines a timeout
    size_t timed_termination;
 
    /// engine option. Sets to true if factorized consistency is used
    bool factorized_consistency;

    bool endgame_mode;

    /// Time when engine is started
    float engine_start_time;

    /// True when a force stop is triggered (possibly via a timeout)
    bool force_stop;

    graphlab_options opts_copy; // local copy of options to pass to
                                // scheduler construction

    execution_status::status_enum termination_reason;

    std::vector<mutex> aggregation_lock;
    std::vector<std::deque<std::string> > aggregation_queue;
  public:

    /**
     * Constructs an asynchronous consistent distributed engine.
     * The number of threads to create are read from
     * \ref graphlab_options::get_ncpus "opts.get_ncpus()". The scheduler to
     * construct is read from
     * \ref graphlab_options::get_scheduler_type() "opts.get_scheduler_type()".
     * The default scheduler
     * is the queued_fifo scheduler. For details on the scheduler types
     * \see scheduler_types
     *
     *  See the <a href=#engineopts> main class documentation</a> for the
     *  available engine options.
     *
     * \param dc Distributed controller to associate with
     * \param graph The graph to schedule over. The graph must be fully
     *              constructed and finalized.
     * \param opts A graphlab::graphlab_options object containing options and
     *             parameters for the scheduler and the engine.
     */
    async_consistent_engine(distributed_control &dc,
                            graph_type& graph,
                            const graphlab_options& opts = graphlab_options()) :
        rmi(dc, this), graph(graph), scheduler_ptr(NULL),
        aggregator(dc, graph, new context_type(*this, graph)), started(false),
        engine_start_time(timer::approx_time_seconds()), force_stop(false) {
      rmi.barrier();

      nfibers = 10000;
      stacksize = 16384;
      use_cache = false;
      factorized_consistency = true;
      track_task_time = false;
      timed_termination = (size_t)(-1);
      termination_reason = execution_status::UNSET;
      set_options(opts);
      init();
      total_completion_time.resize(fiber_control::get_instance().num_workers());
      init();
      rmi.barrier();
    }

  private:

    /**
     * \internal
     * Configures the engine with the provided options.
     * The number of threads to create are read from
     * opts::get_ncpus(). The scheduler to construct is read from
     * graphlab_options::get_scheduler_type(). The default scheduler
     * is the queued_fifo scheduler. For details on the scheduler types
     * \see scheduler_types
     */
    void set_options(const graphlab_options& opts) {
      rmi.barrier();
      ncpus = opts.get_ncpus();
      ASSERT_GT(ncpus, 0);
      aggregation_lock.resize(opts.get_ncpus());
      aggregation_queue.resize(opts.get_ncpus());
      std::vector<std::string> keys = opts.get_engine_args().get_option_keys();
      foreach(std::string opt, keys) {
        if (opt == "timeout") {
          opts.get_engine_args().get_option("timeout", timed_termination);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: timeout = " << timed_termination << std::endl;
        } else if (opt == "factorized") {
          opts.get_engine_args().get_option("factorized", factorized_consistency);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: factorized = " << factorized_consistency << std::endl;
        } else if (opt == "nfibers") {
          opts.get_engine_args().get_option("nfibers", nfibers);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: nfibers = " << nfibers << std::endl;
        } else if (opt == "track_task_time") {
          opts.get_engine_args().get_option("track_task_time", track_task_time);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: track_task_time = " << track_task_time<< std::endl;
        }else if (opt == "stacksize") {
          opts.get_engine_args().get_option("stacksize", stacksize);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: stacksize= " << stacksize << std::endl;
        } else if (opt == "use_cache") {
          opts.get_engine_args().get_option("use_cache", use_cache);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: use_cache = " << use_cache << std::endl;
        } else {
          logstream(LOG_FATAL) << "Unexpected Engine Option: " << opt << std::endl;
        }
      }
      opts_copy = opts;
      // set a default scheduler if none
      if (opts_copy.get_scheduler_type() == "") {
        opts_copy.set_scheduler_type("queued_fifo");
      }

      // construct scheduler passing in the copy of the options from set_options
      scheduler_ptr = scheduler_factory::
                    new_scheduler(graph.num_local_vertices(),
                                  opts_copy);
      rmi.barrier();

      // create initial fork arrangement based on the alternate vid mapping
      if (factorized_consistency == false) {
        cmlocks = new distributed_chandy_misra<graph_type>(rmi.dc(), graph,
                                                    boost::bind(&engine_type::lock_ready, this, _1));
                                                    
      }
      else {
        cmlocks = NULL;
      }

      // construct the termination consensus object
      consensus = new fiber_async_consensus(rmi.dc(), nfibers);
    }

    /**
     * \internal
     * Initializes the engine with respect to the associated graph.
     * This call will initialize all internal and scheduling datastructures.
     * This function must be called prior to any signal function.
     */
    void init() {
      // construct all the required datastructures
      // deinitialize performs the reverse
      graph.finalize();
      scheduler_ptr->set_num_vertices(graph.num_local_vertices());
      messages.resize(graph.num_local_vertices());
      vertexlocks.resize(graph.num_local_vertices());
      program_running.resize(graph.num_local_vertices());
      hasnext.resize(graph.num_local_vertices());
      if (use_cache) {
        gather_cache.resize(graph.num_local_vertices(), gather_type());
        has_cache.resize(graph.num_local_vertices());
        has_cache.clear();
      }
      if (!factorized_consistency) {
        cm_handles.resize(graph.num_local_vertices());
      }
      rmi.barrier();
    }


  public:
    ~async_consistent_engine() {
      delete consensus;
      delete cmlocks;
      delete scheduler_ptr;
    }


    // documentation inherited from iengine
    size_t num_updates() const {
      return programs_executed.value;
    }


    // documentation inherited from iengine
    float elapsed_seconds() const {
      return timer::approx_time_seconds() - engine_start_time;
    }


    /**
     * \brief Not meaningful for the asynchronous engine. Returns -1.
     */
    int iteration() const { return -1; }


/**************************************************************************
 *                           Signaling Interface                          *
 **************************************************************************/

  private:

    /**
     * \internal
     * This is used to receive a message forwarded from another machine
     */
    void rpc_signal(vertex_id_type vid,
                            const message_type& message) {
      if (force_stop) return;
      const lvid_type local_vid = graph.local_vid(vid);
      double priority;
      messages.add(local_vid, message, &priority);
      scheduler_ptr->schedule(local_vid, priority);
      consensus->cancel();
    }

    /**
     * \internal
     * \brief Signals a vertex with an optional message
     *
     * Signals a vertex, and schedules it to be executed in the future.
     * must be called on a vertex accessible by the current machine.
     */
    void internal_signal(const vertex_type& vtx,
                         const message_type& message = message_type()) {
      if (force_stop) return;
      if (started) {
        const typename graph_type::vertex_record& rec = graph.l_get_vertex_record(vtx.local_id());
        const procid_t owner = rec.owner;
        if (endgame_mode) {
          // fast signal. push to the remote machine immediately
          if (owner != rmi.procid()) {
            const vertex_id_type vid = rec.gvid;
            rmi.remote_call(owner, &engine_type::rpc_signal, vid, message);
          }
          else {
            double priority;
            messages.add(vtx.local_id(), message, &priority);
            scheduler_ptr->schedule(vtx.local_id(), priority);
            consensus->cancel();
          }
        }
        else {

          double priority;
          messages.add(vtx.local_id(), message, &priority);
          scheduler_ptr->schedule(vtx.local_id(), priority);
          consensus->cancel();
        }
      }
      else {
        double priority;
        messages.add(vtx.local_id(), message, &priority);
        scheduler_ptr->schedule(vtx.local_id(), priority);
        consensus->cancel();
      }
    } // end of schedule


    /**
     * \internal
     * \brief Signals a vertex with an optional message
     *
     * Signals a global vid, and schedules it to be executed in the future.
     * If current machine does not contain the vertex, it is ignored.
     */
    void internal_signal_gvid(vertex_id_type gvid,
                              const message_type& message = message_type()) {
      if (force_stop) return;
      if (graph.is_master(gvid)) {
        internal_signal(graph.vertex(gvid), message);
      } else {
        procid_t proc = graph.master(gvid);
        rmi.remote_call(proc, &async_consistent_engine::internal_signal_gvid,
                             gvid, message);
      }
    } 


    void rpc_internal_stop() {
      force_stop = true;
      termination_reason = execution_status::FORCED_ABORT;
    }

    /**
     * \brief Force engine to terminate immediately.
     *
     * This function is used to stop the engine execution by forcing
     * immediate termination.
     */
    void internal_stop() {
      for (procid_t i = 0;i < rmi.numprocs(); ++i) {
        rmi.remote_call(i, &async_consistent_engine::rpc_internal_stop);
      }
    }


    /**
     * \brief Post a to a previous gather for a give vertex.
     *
     * This function is called by the \ref graphlab::context.
     *
     * @param [in] vertex The vertex to which to post a change in the sum
     * @param [in] delta The change in that sum
     */
    void internal_post_delta(const vertex_type& vertex,
                             const gather_type& delta) {
      if(use_cache) {
        const lvid_type lvid = vertex.local_id();
        vertexlocks[lvid].lock();
        if( has_cache.get(lvid) ) {
          gather_cache[lvid] += delta;
        } else {
          // You cannot add a delta to an empty cache.  A complete
          // gather must have been run.
          // gather_cache[lvid] = delta;
          // has_cache.set_bit(lvid);
        }
        vertexlocks[lvid].unlock();
      }
    }

    /**
     * \brief Clear the cached gather for a vertex if one is
     * available.
     *
     * This function is called by the \ref graphlab::context.
     *
     * @param [in] vertex the vertex for which to clear the cache
     */
    void internal_clear_gather_cache(const vertex_type& vertex) {
      const lvid_type lvid = vertex.local_id();
      if(use_cache && has_cache.get(lvid)) {
        vertexlocks[lvid].lock();
        gather_cache[lvid] = gather_type();
        has_cache.clear_bit(lvid);
        vertexlocks[lvid].unlock();
      }

    }

  public:


    void signal(vertex_id_type gvid,
                const message_type& message = message_type()) {
      rmi.barrier();
      internal_signal_gvid(gvid, message);
      rmi.barrier();
    }


    void signal_all(const message_type& message = message_type(),
                    const std::string& order = "shuffle") {
      vertex_set vset = graph.complete_set();
      signal_vset(vset, message, order);
    } // end of schedule all

    void signal_vset(const vertex_set& vset,
                    const message_type& message = message_type(),
                    const std::string& order = "shuffle") {
      logstream(LOG_DEBUG) << rmi.procid() << ": Schedule All" << std::endl;
      // allocate a vector with all the local owned vertices
      // and schedule all of them.
      std::vector<vertex_id_type> vtxs;
      vtxs.reserve(graph.num_local_own_vertices());
      for(lvid_type lvid = 0;
          lvid < graph.get_local_graph().num_vertices();
          ++lvid) {
        if (graph.l_vertex(lvid).owner() == rmi.procid() &&
            vset.l_contains(lvid)) {
          vtxs.push_back(lvid);
        }
      }

      if(order == "shuffle") {
        graphlab::random::shuffle(vtxs.begin(), vtxs.end());
      }
      foreach(lvid_type lvid, vtxs) {
        double priority;
        messages.add(lvid, message, &priority);
        scheduler_ptr->schedule(lvid, priority);
      }
      rmi.barrier();
    }


  private: 

    /**
     * Gets a task from the scheduler and the associated message
     */
    sched_status::status_enum get_next_sched_task( size_t threadid,
                                                  lvid_type& lvid,
                                                  message_type& msg) {
      while (1) {
        sched_status::status_enum stat = 
            scheduler_ptr->get_next(threadid % ncpus, lvid);
        if (stat == sched_status::NEW_TASK) {
          if (messages.get(lvid, msg)) return stat;
          else continue;
        }
        return stat;
      }
    }

    void set_endgame_mode() {
        if (!endgame_mode) logstream(LOG_EMPH) << "Endgame mode\n";
        endgame_mode = true;
        rmi.dc().set_fast_track_requests(true);
    } 

    /**
     * \internal
     * Called when get_a_task returns no internal task not a scheduler task.
     * This rechecks the status of the internal task queue and the scheduler
     * inside a consensus critical section.
     */
    bool try_to_quit(size_t threadid,
                     bool& has_sched_msg,
                     lvid_type& sched_lvid,
                     message_type &msg) {
      if (timer::approx_time_seconds() - engine_start_time > timed_termination) {
        termination_reason = execution_status::TIMEOUT;
        force_stop = true;
      }
      fiber_control::yield();
      logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid << ": " << "Termination Attempt " << std::endl;
      has_sched_msg = false;
      consensus->begin_done_critical_section(threadid);
      sched_status::status_enum stat = 
          get_next_sched_task(threadid, sched_lvid, msg);
      if (stat == sched_status::EMPTY || force_stop) {
        logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tTermination Double Checked" << std::endl;

        if (!endgame_mode) logstream(LOG_EMPH) << "Endgame mode\n";
        endgame_mode = true;
        // put everyone in endgame
        for (procid_t i = 0;i < rmi.dc().numprocs(); ++i) {
          rmi.remote_call(i, &async_consistent_engine::set_endgame_mode);
        } 
        bool ret = consensus->end_done_critical_section(threadid);
        if (ret == false) {
          logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tCancelled" << std::endl;
        } else {
          logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tDying" << " (" << fiber_control::get_tid() << ")" << std::endl;
        }
        return ret;
      } else {
        logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tCancelled by Scheduler Task" << std::endl;
        consensus->cancel_critical_section(threadid);
        has_sched_msg = true;
        return false;
      }
    } // end of try to quit


    /**
     * \internal
     * When all distributed locks are acquired, this function is called
     * from the chandy misra implementation on the master vertex.
     * Here, we perform initialization
     * of the task and switch the vertex to a gathering state
     */
    void lock_ready(lvid_type lvid) {
      cm_handles[lvid]->lock.lock();
      cm_handles[lvid]->philosopher_ready = true;
      fiber_control::schedule_tid(cm_handles[lvid]->fiber_handle);
      cm_handles[lvid]->lock.unlock();
    }


    conditional_gather_type perform_gather(vertex_id_type vid,
                               vertex_program_type& vprog_) {
      vertex_program_type vprog = vprog_;
      lvid_type lvid = graph.local_vid(vid);
      local_vertex_type local_vertex(graph.l_vertex(lvid));
      vertex_type vertex(local_vertex);
      context_type context(*this, graph);
      edge_dir_type gather_dir = vprog.gather_edges(context, vertex);
      conditional_gather_type accum;

      //check against the cache
      if( use_cache && has_cache.get(lvid) ) {
          accum.set(gather_cache[lvid]);
          return accum;
      }
      // do in edges
      if(gather_dir == IN_EDGES || gather_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.in_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          accum += vprog.gather(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
        }
      } 
      // do out edges
      if(gather_dir == OUT_EDGES || gather_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.out_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          accum += vprog.gather(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
        }
      } 
      if (use_cache) {
        gather_cache[lvid] = accum.value; has_cache.set_bit(lvid);
      }
      return accum;
    }


    void perform_scatter_local(lvid_type lvid,
                               vertex_program_type& vprog) {
      local_vertex_type local_vertex(graph.l_vertex(lvid));
      vertex_type vertex(local_vertex);
      context_type context(*this, graph);
      edge_dir_type scatter_dir = vprog.scatter_edges(context, vertex);
      if(scatter_dir == IN_EDGES || scatter_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.in_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          vprog.scatter(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
        }
      } 
      if(scatter_dir == OUT_EDGES || scatter_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.out_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          vprog.scatter(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
        }
      } 

      // release locks
      if (!factorized_consistency) {
        cmlocks->philosopher_stops_eating_per_replica(lvid);
      }
    }


    void perform_scatter(vertex_id_type vid,
                    vertex_program_type& vprog_,
                    const vertex_data_type& newdata) {
      vertex_program_type vprog = vprog_;
      lvid_type lvid = graph.local_vid(vid);
      vertexlocks[lvid].lock();
      graph.l_vertex(lvid).data() = newdata;
      vertexlocks[lvid].unlock();
      perform_scatter_local(lvid, vprog);
    }


    // make sure I am the only person running.
    // if returns false, the message has been dropped into the message array.
    // quit
    bool get_exclusive_access_to_vertex(const lvid_type lvid,
                                        const message_type& msg) {
      vertexlocks[lvid].lock();
      bool someone_else_running = program_running.set_bit(lvid);
      if (someone_else_running) {
        // bad. someone else is here.
        // drop it into the message array
        messages.add(lvid, msg);
        hasnext.set_bit(lvid);
      } 
      vertexlocks[lvid].unlock();
      return !someone_else_running;
    }


    // make sure I am the only person running.
    // if returns false, the message has been dropped into the message array.
    // quit
    void release_exclusive_access_to_vertex(const lvid_type lvid) {
      vertexlocks[lvid].lock();
      // someone left a next message for me
      // reschedule it at high priority
      if (hasnext.get(lvid)) {
        scheduler_ptr->schedule(lvid, 10000.0);
        consensus->cancel();
        hasnext.clear_bit(lvid);
      }
      program_running.clear_bit(lvid);
      vertexlocks[lvid].unlock();
    }


    /**
     * \internal
     * Called when the scheduler returns a vertex to run.
     * If this function is called with vertex locks acquired, prelocked
     * should be true. Otherwise it should be false.
     */
    void eval_sched_task(const lvid_type lvid,
                         const message_type& msg) {
      const typename graph_type::vertex_record& rec = graph.l_get_vertex_record(lvid);
      vertex_id_type vid = rec.gvid;
      char task_time_data[sizeof(timer)];
      timer* task_time;
      if (track_task_time) {
        // placement new to create the timer
        task_time = reinterpret_cast<timer*>(task_time_data);
        new (task_time) timer();
      }
      // if this is another machine's forward it
      if (rec.owner != rmi.procid()) {
        rmi.remote_call(rec.owner, &engine_type::rpc_signal, vid, msg);
        return;
      }
      // I have to run this myself
      
      if (!get_exclusive_access_to_vertex(lvid, msg)) return;

      /**************************************************************************/
      /*                             Acquire Locks                              */
      /**************************************************************************/
      if (!factorized_consistency) {
        // begin lock acquisition
        cm_handles[lvid] = new vertex_fiber_cm_handle;
        cm_handles[lvid]->philosopher_ready = false;
        cm_handles[lvid]->fiber_handle = fiber_control::get_tid();
        cmlocks->make_philosopher_hungry(lvid);
        cm_handles[lvid]->lock.lock();
        while (!cm_handles[lvid]->philosopher_ready) {
          fiber_control::deschedule_self(&(cm_handles[lvid]->lock.m_mut));
          cm_handles[lvid]->lock.lock();
        }
        cm_handles[lvid]->lock.unlock();
      }

      /**************************************************************************/
      /*                             Begin Program                              */
      /**************************************************************************/
      context_type context(*this, graph);
      vertex_program_type vprog = vertex_program_type();
      local_vertex_type local_vertex(graph.l_vertex(lvid));
      vertex_type vertex(local_vertex);

      /**************************************************************************/
      /*                               init phase                               */
      /**************************************************************************/
      vprog.init(context, vertex, msg);

      /**************************************************************************/
      /*                              Gather Phase                              */
      /**************************************************************************/
      conditional_gather_type gather_result;
      std::vector<request_future<conditional_gather_type> > gather_futures;
      foreach(procid_t mirror, local_vertex.mirrors()) {
        gather_futures.push_back(
            object_fiber_remote_request(rmi, 
                                        mirror, 
                                        &async_consistent_engine::perform_gather, 
                                        vid,
                                        vprog));
      }
      gather_result += perform_gather(vid, vprog);

      for(size_t i = 0;i < gather_futures.size(); ++i) {
        gather_result += gather_futures[i]();
      }

     /**************************************************************************/
     /*                              apply phase                               */
     /**************************************************************************/
     vertexlocks[lvid].lock();
     vprog.apply(context, vertex, gather_result.value);      
     vertexlocks[lvid].unlock();


     /**************************************************************************/
     /*                            scatter phase                               */
     /**************************************************************************/

     // should I wait for the scatter? nah... but in case you want to
     // the code is commented below
     /*foreach(procid_t mirror, local_vertex.mirrors()) {
       rmi.remote_call(mirror, 
                       &async_consistent_engine::perform_scatter, 
                       vid,
                       vprog,
                       local_vertex.data());
     }*/

     std::vector<request_future<void> > scatter_futures;
     foreach(procid_t mirror, local_vertex.mirrors()) {
       scatter_futures.push_back(
           object_fiber_remote_request(rmi, 
                                       mirror, 
                                       &async_consistent_engine::perform_scatter, 
                                       vid,
                                       vprog,
                                       local_vertex.data()));
     }
     perform_scatter_local(lvid, vprog);
     for(size_t i = 0;i < scatter_futures.size(); ++i) 
       scatter_futures[i]();

      /************************************************************************/
      /*                           Release Locks                              */
      /************************************************************************/
      // the scatter is used to release the chandy misra
      // here I cleanup
      if (!factorized_consistency) {
        delete cm_handles[lvid];
        cm_handles[lvid] = NULL;
      }
      release_exclusive_access_to_vertex(lvid);
      if (track_task_time) {
        total_completion_time[fiber_control::get_worker_id()] += 
            task_time->current_time();
        task_time->~timer();
      }
      programs_executed.inc(); 
    }


    /**
     * \internal
     * Per thread main loop
     */
    void thread_start(size_t threadid) {
      bool has_sched_msg = false;
      std::vector<std::vector<lvid_type> > internal_lvid;
      lvid_type sched_lvid;

      message_type msg;
      float last_aggregator_check = timer::approx_time_seconds();
      timer ti; ti.start();
      while(1) {
        if (timer::approx_time_seconds() != last_aggregator_check && !endgame_mode) {
          last_aggregator_check = timer::approx_time_seconds();
          std::string key = aggregator.tick_asynchronous();
          if (key != "") {
            for (size_t i = 0;i < aggregation_lock.size(); ++i) {
              aggregation_lock[i].lock();
              aggregation_queue[i].push_back(key);
              aggregation_lock[i].unlock();
            }
          }
        }

        // test the aggregator
        while(!aggregation_queue[fiber_control::get_worker_id()].empty()) {
          size_t wid = fiber_control::get_worker_id();
          ASSERT_LT(wid, ncpus);
          aggregation_lock[wid].lock();
          std::string key = aggregation_queue[wid].front();
          aggregation_queue[wid].pop_front();
          aggregation_lock[wid].unlock();
          aggregator.tick_asynchronous_compute(wid, key);
        }

        sched_status::status_enum stat = get_next_sched_task(threadid, sched_lvid, msg);


        has_sched_msg = stat != sched_status::EMPTY;
        if (stat != sched_status::EMPTY) {
          eval_sched_task(sched_lvid, msg);
          if (endgame_mode) rmi.dc().flush();
        }
        else if (!try_to_quit(threadid, has_sched_msg, sched_lvid, msg)) {
          /*
           * We failed to obtain a task, try to quit
           */
          if (has_sched_msg) {
            eval_sched_task(sched_lvid, msg);
          }
        } else { 
          break; 
        }

        if (fiber_control::worker_has_priority_fibers_on_queue()) {
          fiber_control::yield();
        }
      }
    } // end of thread start

/**************************************************************************
 *                         Main engine start()                            *
 **************************************************************************/

  public:


    /**
      * \brief Start the engine execution.
      *
      * This function starts the engine and does not
      * return until the scheduler has no tasks remaining.
      *
      * \return the reason for termination
      */
    execution_status::status_enum start() {
      bool old_fasttrack = rmi.dc().set_fast_track_requests(false);
      logstream(LOG_INFO) << "Spawning " << nfibers << " threads" << std::endl;
      ASSERT_TRUE(scheduler_ptr != NULL);
      consensus->reset();

      // now. It is of critical importance that we match the number of 
      // actual workers
     

      // start the aggregator
      aggregator.start(ncpus);
      aggregator.aggregate_all_periodic();

      started = true;

      rmi.barrier();
      size_t allocatedmem = memory_info::allocated_bytes();
      rmi.all_reduce(allocatedmem);

      engine_start_time = timer::approx_time_seconds();
      force_stop = false;
      endgame_mode = false;
      programs_executed = 0;
      launch_timer.start();

      termination_reason = execution_status::RUNNING;
      if (rmi.procid() == 0) {
        logstream(LOG_INFO) << "Total Allocated Bytes: " << allocatedmem << std::endl;
      }
      thrgroup.set_stacksize(stacksize);
        
      size_t effncpus = std::min(ncpus, fiber_control::get_instance().num_workers());
      for (size_t i = 0; i < nfibers ; ++i) {
        thrgroup.launch(boost::bind(&engine_type::thread_start, this, i), 
                        i % effncpus);
      }
      thrgroup.join();
      aggregator.stop();
      // if termination reason was not changed, then it must be depletion
      if (termination_reason == execution_status::RUNNING) {
        termination_reason = execution_status::TASK_DEPLETION;
      }

      size_t ctasks = programs_executed.value;
      rmi.all_reduce(ctasks);
      programs_executed.value = ctasks;

      rmi.cout() << "Completed Tasks: " << programs_executed.value << std::endl;


      size_t numjoins = messages.num_joins();
      rmi.all_reduce(numjoins);
      rmi.cout() << "Schedule Joins: " << numjoins << std::endl;

      size_t numadds = messages.num_adds();
      rmi.all_reduce(numadds);
      rmi.cout() << "Schedule Adds: " << numadds << std::endl;

      if (track_task_time) {
        double total_task_time = 0;
        for (size_t i = 0;i < total_completion_time.size(); ++i) {
          total_task_time += total_completion_time[i];
        }
        rmi.all_reduce(total_task_time);
        rmi.cerr() << "Average Task Completion Time = " 
                   << total_task_time / programs_executed.value << std::endl;
      }


      ASSERT_TRUE(scheduler_ptr->empty());
      started = false;

      rmi.dc().set_fast_track_requests(old_fasttrack);
      return termination_reason;
    } // end of start


  public:
    aggregator_type* get_aggregator() { return &aggregator; }

  }; // end of class
} // namespace

#include <graphlab/macros_undef.hpp>

#endif // GRAPHLAB_DISTRIBUTED_ENGINE_HPP


================================================
FILE: src/graphlab/engine/distributed_chandy_misra.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DISTRIBUTED_CHANDY_MISRA_HPP
#define GRAPHLAB_DISTRIBUTED_CHANDY_MISRA_HPP
#include <vector>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

/**
  *
  * \internal
  */
template <typename GraphType>
class distributed_chandy_misra {
 public:
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  
  typedef typename GraphType::vertex_id_type vertex_id_type;
  typedef typename GraphType::lvid_type lvid_type;

  typedef distributed_chandy_misra<GraphType> dcm_type;
  dc_dist_object<dcm_type> rmi;
  GraphType& graph;

  boost::function<void(lvid_type)> callback;
  boost::function<void(lvid_type)> hors_doeuvre_callback;
  /*
   * Each "fork" is one character.
   * bit 0: owner. if 0 is src. if 1 is target
   * bit 1: clean = 0, dirty = 1
   * bit 2: owner 0 request
   * bit 3: owner 1 request
   */
  std::vector<unsigned char> forkset;
  enum { OWNER_BIT = 1,
         DIRTY_BIT = 2,
         REQUEST_0 = 4,
         REQUEST_1 = 8 };
  enum {OWNER_SOURCE = 0, OWNER_TARGET = 1};
  inline unsigned char request_bit(bool owner) {
    return owner ? REQUEST_1 : REQUEST_0;
  }


  enum {
    COLLISIONS = 0,
    CANCELLATIONS = 1,
    ACCEPTED_CANCELLATIONS = 2
  };
  
  struct philosopher {
    vertex_id_type num_edges;
    vertex_id_type forks_acquired;
    simple_spinlock lock;
    unsigned char state;
    unsigned char counter;
    bool cancellation_sent;
    bool lockid;
  };
  std::vector<philosopher> philosopherset;
  atomic<size_t> clean_fork_count;
    
  /*
   * Possible values for the philosopher state
   */
  enum {
    THINKING = 0,
    HUNGRY = 1,
    HORS_DOEUVRE = 2, 
    EATING = 3
  };

  /** Places a request for the fork. Requires fork to be locked */
  inline void request_for_fork(size_t forkid, bool nextowner) {
    __sync_fetch_and_or(&forkset[forkid], request_bit(nextowner));
  }

  inline bool fork_owner(size_t forkid) {
    return forkset[forkid] & OWNER_BIT;
  }

  inline bool fork_dirty(size_t forkid) {
    return !!(forkset[forkid] & DIRTY_BIT);
  }

  inline void dirty_fork(size_t forkid) {
      if ((forkset[forkid] & DIRTY_BIT) == 0) clean_fork_count.dec();
    __sync_fetch_and_or(&forkset[forkid], DIRTY_BIT);
  }


  void compute_initial_fork_arrangement() {
    for (lvid_type i = 0;i < graph.num_local_vertices(); ++i) {
      local_vertex_type lvertex(graph.l_vertex(i));
      philosopherset[i].num_edges = lvertex.num_in_edges() +
                                    lvertex.num_out_edges();
      philosopherset[i].state = THINKING;
      philosopherset[i].forks_acquired = 0;
      philosopherset[i].counter = 0;
      philosopherset[i].cancellation_sent = false;
      philosopherset[i].lockid = false;
    }
    for (lvid_type i = 0;i < graph.num_local_vertices(); ++i) {
      local_vertex_type lvertex(graph.l_vertex(i));
      foreach(local_edge_type edge, lvertex.in_edges()) {
        if (edge.source().global_id() > edge.target().global_id()) {
          forkset[edge.id()] = DIRTY_BIT | OWNER_TARGET;
          philosopherset[edge.target().id()].forks_acquired++;
        }
        else {
          forkset[edge.id()] = DIRTY_BIT | OWNER_SOURCE;
          philosopherset[edge.source().id()].forks_acquired++;
        }
      }
    }
  }

  /**
   * We already have v1, we want to acquire v2.
   * When this function returns, both v1 and v2 locks are acquired
   */
  void try_acquire_edge_with_backoff(lvid_type v1,
                                     lvid_type v2) {
    if (v1 < v2) {
      philosopherset[v2].lock.lock();
    }
    else if (!philosopherset[v2].lock.try_lock()) {
        philosopherset[v1].lock.unlock();
        philosopherset[v2].lock.lock();
        philosopherset[v1].lock.lock();
    }
  }
  
/****************************************************************************
 * Tries to move a requested fork
 *
 * Pseudocode:
 *  If current owner is hungry and fork is clean
 *    Ignore
 *  ElseIf current owner is Thinking
 *    Relinquish fork immediately and clear the request flag
 *  ElseIf current owner is hors_doeuvre and fork is clean
 *    Ignore
 *  ElseIf current owner is hors_doeuvre and fork is dirty
 *    Send cancellation message
 *    Set cancelsent
 *  End
 * Return true if changes were made
 ***************************************************************************/
  inline bool advance_fork_state_on_lock(size_t forkid,
                                        lvid_type source,
                                        lvid_type target) {
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    if (currentowner == OWNER_SOURCE) {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if (philosopherset[source].state != EATING &&
          (forkset[forkid] & DIRTY_BIT) &&
          (forkset[forkid] & REQUEST_1)) {

        if (philosopherset[source].state != HORS_DOEUVRE) {
          //  change the owner and clean the fork)
          forkset[forkid] = OWNER_TARGET;
          clean_fork_count.inc();
          if (philosopherset[source].state == HUNGRY) {
            forkset[forkid] |= REQUEST_0;
          }
          philosopherset[source].forks_acquired--;
          philosopherset[target].forks_acquired++;
          return true;
        }
        else if (philosopherset[source].cancellation_sent == false) {
          //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, CANCELLATIONS, 1);
          philosopherset[source].cancellation_sent = true;
          bool lockid = philosopherset[source].lockid;
          philosopherset[source].lock.unlock();
          philosopherset[target].lock.unlock();
          issue_cancellation_request_unlocked(source, lockid);
          philosopherset[std::min(source, target)].lock.lock();
          philosopherset[std::max(source, target)].lock.lock();
        }
      }
    }
    else {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if (philosopherset[target].state != EATING &&
          (forkset[forkid] & DIRTY_BIT) &&
          (forkset[forkid] & REQUEST_0)) {
        //  change the owner and clean the fork)
        if (philosopherset[target].state != HORS_DOEUVRE) {
          forkset[forkid] = OWNER_SOURCE;
          clean_fork_count.inc();
          if (philosopherset[target].state == HUNGRY) {
            forkset[forkid] |= REQUEST_1;
          }
          philosopherset[source].forks_acquired++;
          philosopherset[target].forks_acquired--;
          return true;
        }
        else if (philosopherset[target].cancellation_sent == false) {
          //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, CANCELLATIONS, 1);
          philosopherset[target].cancellation_sent = true;
          bool lockid = philosopherset[target].lockid;
          philosopherset[source].lock.unlock();
          philosopherset[target].lock.unlock();
          issue_cancellation_request_unlocked(target, lockid);
          philosopherset[std::min(source, target)].lock.lock();
          philosopherset[std::max(source, target)].lock.lock();
        }
      }
    }
    //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, COLLISIONS, 1);
    return false;
  }


/****************************************************************************
 * Performs a cancellation on a vertex.
 * 
 * If lockIds do not match, ignore
 * If counter == 0 ignore
 * Otherwise, counter++ and reply cancellation accept.
 * Unfortunately, I cannot perform a local call here even if I am the
 * owner since this may produce a lock cycle. Irregardless of whether
 * the owner is local or not, this must be performed by a remote call
 ***************************************************************************/

  void cancellation_request_unlocked(lvid_type lvid, procid_t requestor, bool lockid) {
    philosopherset[lvid].lock.lock();
    
    if (philosopherset[lvid].lockid == lockid) {
      if (philosopherset[lvid].counter > 0) {
        /*ASSERT_TRUE(philosopherset[lvid].state == HORS_DOEUVRE || 
                    philosopherset[lvid].state == HUNGRY);*/
        ++philosopherset[lvid].counter;
        bool lockid = philosopherset[lvid].lockid;
        //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, ACCEPTED_CANCELLATIONS, 1);
        vertex_id_type gvid = graph.global_vid(lvid);
        logstream(LOG_DEBUG) << rmi.procid() <<
            ": Cancellation accepted on " << gvid <<
            "(" << (int)philosopherset[lvid].counter << ")" << std::endl;
        philosopherset[lvid].lock.unlock();
        
        if (requestor != rmi.procid()) {
          unsigned char pkey = rmi.dc().set_sequentialization_key(gvid % 254 + 1);
          rmi.remote_call(requestor,
                          &dcm_type::rpc_cancellation_accept,
                          gvid,
                          lockid);
          rmi.dc().set_sequentialization_key(pkey);
        }
        else {
          cancellation_accept_unlocked(lvid, lockid);
        }
      }
      else {
        philosopherset[lvid].lock.unlock();
        logstream(LOG_DEBUG) << rmi.procid() <<
          ": Cancellation on " << graph.global_vid(lvid) <<
          " denied due to lock completion" << std::endl;
      }
    }
    else {
      philosopherset[lvid].lock.unlock();
      logstream(LOG_DEBUG) << rmi.procid() <<
        ": Cancellation on " << graph.global_vid(lvid) <<
        " denied to invalid lock ID" << std::endl;
    }
    
  }

  void rpc_cancellation_request(vertex_id_type gvid, procid_t requestor, bool lockid) {
    lvid_type lvid = graph.local_vid(gvid);
    cancellation_request_unlocked(lvid, requestor, lockid);
  }

  void issue_cancellation_request_unlocked(lvid_type lvid, bool lockid) {
    // signal the master
    logstream(LOG_DEBUG) << rmi.procid() <<
        ": Requesting cancellation on " << graph.global_vid(lvid) << std::endl;
    local_vertex_type lvertex(graph.l_vertex(lvid));
    
    if (lvertex.owner() == rmi.procid()) {
      cancellation_request_unlocked(lvid, rmi.procid(), lockid);
    }
    else {
      unsigned char pkey = rmi.dc().set_sequentialization_key(lvertex.global_id() % 254 + 1);
      rmi.remote_call(lvertex.owner(),
                    &dcm_type::rpc_cancellation_request,
                    lvertex.global_id(),
                    rmi.procid(), 
                    lockid);
      rmi.dc().set_sequentialization_key(pkey);

    }
  }


/****************************************************************************
 * Accepts a cancellation on a vertex.
 *
 * Pseudocode
 *  Change back to Hungry
 *  Releases all dirty forks
 ****************************************************************************/

  void rpc_cancellation_accept(vertex_id_type gvid, bool lockid) {
    lvid_type lvid = graph.local_vid(gvid);
    cancellation_accept_unlocked(lvid, lockid);
  }

  void cancellation_accept_unlocked(lvid_type p_id, bool lockid) {
    std::vector<lvid_type> retval;
    philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    /*ASSERT_EQ (lockid, philosopherset[p_id].lockid);
    ASSERT_EQ((int)philosopherset[p_id].state, (int)HORS_DOEUVRE); */
    philosopherset[p_id].state = HUNGRY;
    philosopherset[p_id].cancellation_sent = false;
    
    local_vertex_type lvertex(graph.l_vertex(p_id));
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Cancellation accept received on " << lvertex.global_id() << " " <<
            philosopherset[p_id].state << std::endl;

    // for each fork I own, try to give it away
    foreach(local_edge_type edge, lvertex.in_edges()) {
      try_acquire_edge_with_backoff(edge.target().id(), edge.source().id());
      if (philosopherset[p_id].state == HUNGRY) {
        //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
        lvid_type other = edge.source().id();
        size_t edgeid = edge.id();
        if (fork_owner(edgeid) == OWNER_TARGET && fork_dirty(edgeid)) {
          
          if (advance_fork_state_on_lock(edgeid, edge.source().id(), edge.target().id()) &&
              philosopherset[other].state == HUNGRY &&
              philosopherset[other].forks_acquired == philosopherset[other].num_edges) {
            philosopherset[other].state = HORS_DOEUVRE;
            philosopherset[other].cancellation_sent = false;
            // signal eating on other
            retval.push_back(other);
          }
        }
        philosopherset[edge.source().id()].lock.unlock();
      }
      else {
        philosopherset[edge.source().id()].lock.unlock();
        break;
      }
      
    }
    //std::cout << "out edges: " << std::endl;
    foreach(local_edge_type edge, lvertex.out_edges()) {
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      try_acquire_edge_with_backoff(edge.source().id(), edge.target().id());
      if (philosopherset[p_id].state == HUNGRY) {
        lvid_type other = edge.target().id();
        size_t edgeid = edge.id();
        if (fork_owner(edgeid) == OWNER_SOURCE && fork_dirty(edgeid)) {
          if (advance_fork_state_on_lock(edgeid, edge.source().id(), edge.target().id()) &&
              philosopherset[other].state == HUNGRY &&
              philosopherset[other].forks_acquired == philosopherset[other].num_edges) {
            philosopherset[other].state = HORS_DOEUVRE;
            philosopherset[other].cancellation_sent = false;
            // signal eating on other
            retval.push_back(other);
          }
        }
        philosopherset[edge.target().id()].lock.unlock();
      }
      else {        
        philosopherset[edge.target().id()].lock.unlock();
        break;
      }
    }
    
    if (philosopherset[p_id].state == HUNGRY &&
        philosopherset[p_id].forks_acquired == philosopherset[p_id].num_edges) {
      philosopherset[p_id].cancellation_sent = false;
      philosopherset[p_id].state = HORS_DOEUVRE;
      retval.push_back(p_id);
    }
      
    philosopherset[p_id].lock.unlock();
    foreach(lvid_type lvid, retval) {
      enter_hors_doeuvre_unlocked(lvid);
    }

  }
  
/****************************************************************************
 * Make Philosopher Hungry.
 *
 * Pseudocode:
 * Set Philosopher to Hungry
 * For all edges adjacent to v with forks it does not own:
 *   Send request for fork to neighboring vertex
 *
 * Conditions:
 *   Must be Thinking
 *   New lock ID must not be the same as the old lock ID
 *
 * Possible Immediate Transitions:
 *   Current vertex may enter HORS_DOEUVRE
 ***************************************************************************/
  void rpc_make_philosopher_hungry(vertex_id_type gvid, bool newlockid) {
    lvid_type lvid = graph.local_vid(gvid);
    logstream(LOG_DEBUG) << rmi.procid() <<
          ": Local HUNGRY Philosopher  " << gvid << std::endl;
    philosopherset[lvid].lock.lock();

    //ASSERT_EQ((int)philosopherset[lvid].state, (int)THINKING);
    philosopherset[lvid].state = HUNGRY;

//    ASSERT_NE(philosopherset[lvid].lockid, newlockid);
    philosopherset[lvid].lockid = newlockid;

    philosopherset[lvid].lock.unlock();

    local_philosopher_grabs_forks(lvid);
  }

  void local_philosopher_grabs_forks(lvid_type p_id) {
    philosopherset[p_id].lock.lock();
    local_vertex_type lvertex(graph.l_vertex(p_id));
    //philosopher is now hungry!
// now try to get all the forks. lock one edge at a time
    // using the backoff strategy
    //std::cout << "vertex " << p_id << std::endl;
    //std::cout << "in edges: " << std::endl;
    foreach(local_edge_type edge, lvertex.in_edges()) {
      try_acquire_edge_with_backoff(edge.target().id(), edge.source().id());
      if (philosopherset[p_id].state == HUNGRY) {

        //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
        size_t edgeid = edge.id();
        // if fork is owned by other edge, try to take it
        if (fork_owner(edgeid) == OWNER_SOURCE) {
          request_for_fork(edgeid, OWNER_TARGET);
          advance_fork_state_on_lock(edgeid, edge.source().id(), edge.target().id());
        }
        philosopherset[edge.source().id()].lock.unlock();
      }
      else {
        philosopherset[edge.source().id()].lock.unlock();
        break;
      }
    }
    //std::cout << "out edges: " << std::endl;
    foreach(local_edge_type edge, lvertex.out_edges()) {
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      try_acquire_edge_with_backoff(edge.source().id(), edge.target().id());
      if (philosopherset[p_id].state == HUNGRY) {
        size_t edgeid = edge.id();

        // if fork is owned by other edge, try to take it
        if (fork_owner(edgeid) == OWNER_TARGET) {
          request_for_fork(edgeid, OWNER_SOURCE);
          advance_fork_state_on_lock(edgeid, edge.source().id(), edge.target().id());
        }
        philosopherset[edge.target().id()].lock.unlock();
      }
      else {
        philosopherset[edge.target().id()].lock.unlock();
        break;
      }
    }

    bool enter_hors = false;
    if (philosopherset[p_id].state == HUNGRY &&
        philosopherset[p_id].forks_acquired == philosopherset[p_id].num_edges) {
      philosopherset[p_id].state = HORS_DOEUVRE;
      philosopherset[p_id].cancellation_sent = false;
      enter_hors = true;
    }
    philosopherset[p_id].lock.unlock();
    if (enter_hors) enter_hors_doeuvre_unlocked(p_id);
  }

/************************************************************************
 *
 * Called when a vertex may be ready to enter hors dourre
 * Locks must be maintained. HORS_DOEUVRE must be set prior
 * to entering this function .
 *
 ***********************************************************************/
  void enter_hors_doeuvre_unlocked(lvid_type p_id) {
     // if I got all forks I can eat
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Local HORS_DOEUVRE Philosopher  " << graph.global_vid(p_id) << std::endl;
    // signal the master
    local_vertex_type lvertex(graph.l_vertex(p_id));

    if (lvertex.owner() == rmi.procid()) {
      signal_ready_unlocked(p_id, philosopherset[p_id].lockid);
    }
    else {
      unsigned char pkey = rmi.dc().set_sequentialization_key(lvertex.global_id() % 254 + 1);
      if (hors_doeuvre_callback != NULL) hors_doeuvre_callback(p_id);
      rmi.remote_call(lvertex.owner(),
                      &dcm_type::rpc_signal_ready,
                      lvertex.global_id(), philosopherset[p_id].lockid);
      rmi.dc().set_sequentialization_key(pkey);
    }
  }

/************************************************************************
 *
 * Called when a vertex enters HORS_DOEUVRE. Locks must be maintained.
 * 
 * Conditions:
 *   vertex must be in HUNGRY or HORS_DOEUVRE
 *   lock IDs must match
 *
 * Possible Immediate Transitions
 *   If counter == 0, transit to EATING
 ***********************************************************************/

  void signal_ready_unlocked(lvid_type lvid, bool lockid) {
    philosopherset[lvid].lock.lock();
    if(!(philosopherset[lvid].state == (int)HUNGRY ||
      philosopherset[lvid].state == (int)HORS_DOEUVRE)) {
      logstream(LOG_ERROR) << rmi.procid() <<
              ": Bad signal ready state!!!! : " << (int)philosopherset[lvid].state << std::endl;
      logstream(LOG_ERROR) << rmi.procid() <<
              " Lock IDs : " << (int)philosopherset[lvid].lockid << " " << (int)lockid << std::endl;
      logstream(LOG_ERROR) << rmi.procid() <<
            ": BAD Global HORS_DOEUVRE " << graph.global_vid(lvid)
            << "(" << (int)philosopherset[lvid].counter << ")" << std::endl;
  
     /* ASSERT_TRUE(philosopherset[lvid].state == (int)HUNGRY ||
                  philosopherset[lvid].state == (int)HORS_DOEUVRE);*/
    }
    
//    ASSERT_EQ(philosopherset[lvid].lockid, lockid);
    philosopherset[lvid].counter--;

    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Global HORS_DOEUVRE " << graph.global_vid(lvid)
            << "(" << (int)philosopherset[lvid].counter << ")" << " " << (int)(philosopherset[lvid].state) << std::endl;
  
    if(philosopherset[lvid].counter == 0) {
      philosopherset[lvid].lock.unlock();
      // broadcast EATING
      local_vertex_type lvertex(graph.l_vertex(lvid));
      unsigned char pkey = rmi.dc().set_sequentialization_key(lvertex.global_id() % 254 + 1);
      rmi.remote_call(lvertex.mirrors().begin(), lvertex.mirrors().end(),
                      &dcm_type::rpc_set_eating, lvertex.global_id(), lockid);
      set_eating(lvid, lockid);
      rmi.dc().set_sequentialization_key(pkey);
    }
    else {
      philosopherset[lvid].lock.unlock();
    }
  }


  void rpc_signal_ready(vertex_id_type gvid, bool lockid) {
    lvid_type lvid = graph.local_vid(gvid);
    signal_ready_unlocked(lvid, lockid);
  }

  void set_eating(lvid_type lvid, bool lockid) {
    philosopherset[lvid].lock.lock();
    
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": EATING " << graph.global_vid(lvid)
            << "(" << (int)philosopherset[lvid].counter << ")" << std::endl;
  
//    ASSERT_EQ((int)philosopherset[lvid].state, (int)HORS_DOEUVRE);
//    ASSERT_EQ(philosopherset[lvid].lockid, lockid);
    philosopherset[lvid].state = EATING;
    philosopherset[lvid].cancellation_sent = false;
    philosopherset[lvid].lock.unlock();
    if (graph.l_vertex(lvid).owner() == rmi.procid()) {
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": CALLBACK " << graph.global_vid(lvid) << std::endl;

      callback(lvid);
    }
  }

  void rpc_set_eating(vertex_id_type gvid, bool lockid) {

    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Receive Set EATING " << gvid << std::endl;
  
    lvid_type lvid = graph.local_vid(gvid);
    set_eating(lvid, lockid);
  }
/************************************************************************
 *
 * Called when a vertex stops eating
 *
 ***********************************************************************/


  inline bool advance_fork_state_on_unlock(size_t forkid,
                                         lvid_type source,
                                         lvid_type target) {

    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    if (currentowner == OWNER_SOURCE) {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if ((forkset[forkid] & DIRTY_BIT) &&
        (forkset[forkid] & REQUEST_1)) {
        //  change the owner and clean the fork)
        // keep my request bit if any
        clean_fork_count.inc();
        forkset[forkid] = OWNER_TARGET;
        philosopherset[source].forks_acquired--;
        philosopherset[target].forks_acquired++;
        return true;
      }
    }
    else {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if ((forkset[forkid] & DIRTY_BIT) &&
        (forkset[forkid] & REQUEST_0)) {
        //  change the owner and clean the fork)
        // keep my request bit if any
        clean_fork_count.inc();
        forkset[forkid] = OWNER_SOURCE;
        philosopherset[source].forks_acquired++;
        philosopherset[target].forks_acquired--;
        return true;
      }
    }
    return false;
  }


  void local_philosopher_stops_eating(lvid_type p_id) {
    std::vector<lvid_type> retval;
    philosopherset[p_id].lock.lock();
    if (philosopherset[p_id].state != EATING) {
      std::cout << rmi.procid() << ": " << p_id << "FAILED!! Cannot Stop Eating!" << std::endl;
//      ASSERT_EQ((int)philosopherset[p_id].state, (int)EATING);
    }
    
    local_vertex_type lvertex(graph.l_vertex(p_id));
    // now forks are dirty
    foreach(local_edge_type edge, lvertex.in_edges()) {
      dirty_fork(edge.id());
    }
    
    foreach(local_edge_type edge, lvertex.out_edges()) {
      dirty_fork(edge.id());
    }
    
    
    philosopherset[p_id].state = THINKING;
    philosopherset[p_id].counter = 0;

    // now forks are dirty
    foreach(local_edge_type edge, lvertex.in_edges()) {
      try_acquire_edge_with_backoff(edge.target().id(), edge.source().id());
      lvid_type other = edge.source().id();
      if (philosopherset[p_id].state == THINKING) {
        size_t edgeid = edge.id();
        advance_fork_state_on_unlock(edgeid, edge.source().id(), edge.target().id());
        if (philosopherset[other].state == HUNGRY &&
              philosopherset[other].forks_acquired ==
                  philosopherset[other].num_edges) {
          philosopherset[other].state = HORS_DOEUVRE;
          philosopherset[other].cancellation_sent = false;
          // signal eating on other
          retval.push_back(other);
        }
        philosopherset[other].lock.unlock();
      }
      else {
        philosopherset[other].lock.unlock();
        break;
      }
    }

    foreach(local_edge_type edge, lvertex.out_edges()) {
      try_acquire_edge_with_backoff(edge.source().id(), edge.target().id());
      lvid_type other = edge.target().id();
      if (philosopherset[p_id].state == THINKING) {
        size_t edgeid = edge.id();
        advance_fork_state_on_unlock(edgeid, edge.source().id(), edge.target().id());
        if (philosopherset[other].state == HUNGRY &&
              philosopherset[other].forks_acquired ==
                  philosopherset[other].num_edges) {
          philosopherset[other].state = HORS_DOEUVRE;
          philosopherset[other].cancellation_sent = false;
          // signal eating on other
          retval.push_back(other);
        }
        philosopherset[other].lock.unlock();
      }
      else {
        philosopherset[other].lock.unlock();
        break;
      }
    }

    philosopherset[p_id].lock.unlock();
    foreach(lvid_type lvid, retval) {
      enter_hors_doeuvre_unlocked(lvid);
    }
  }

  void rpc_philosopher_stops_eating(vertex_id_type gvid) {
    logstream(LOG_DEBUG) << rmi.procid() << ": Receive STOP eating on " << gvid << std::endl;
    local_philosopher_stops_eating(graph.local_vid(gvid));
  }

 public:
  inline distributed_chandy_misra(distributed_control &dc,
                                  GraphType &graph,
                                  boost::function<void(lvid_type)> callback,
                                  boost::function<void(lvid_type)> hors_doeuvre_callback = NULL
                                  ):
                          rmi(dc, this),
                          graph(graph),
                          callback(callback),
                          hors_doeuvre_callback(hors_doeuvre_callback){
    forkset.resize(graph.num_local_edges(), 0);
    philosopherset.resize(graph.num_local_vertices());
    compute_initial_fork_arrangement();

    rmi.barrier();
  }

  size_t num_clean_forks() const {
    return clean_fork_count.value;
  }

  void initialize_master_philosopher_as_hungry_locked(lvid_type p_id,
                                                      bool lockid) {
    philosopherset[p_id].lockid = lockid;
    philosopherset[p_id].state = HUNGRY;
    philosopherset[p_id].counter = graph.l_vertex(p_id).num_mirrors() + 1;
  }
  
  void make_philosopher_hungry(lvid_type p_id) {
    local_vertex_type lvertex(graph.l_vertex(p_id));    
//    ASSERT_EQ(rec.get_owner(), rmi.procid());
    philosopherset[p_id].lock.lock();
//    ASSERT_EQ((int)philosopherset[p_id].state, (int)THINKING);
    bool newlockid = !philosopherset[p_id].lockid;
    initialize_master_philosopher_as_hungry_locked(p_id, newlockid);
    
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Global HUNGRY " << lvertex.global_id()
            << "(" << (int)philosopherset[p_id].counter << ")" << std::endl;
  
    philosopherset[p_id].lock.unlock();
    
    unsigned char pkey = rmi.dc().set_sequentialization_key(lvertex.global_id() % 254 + 1);
    rmi.remote_call(lvertex.mirrors().begin(), lvertex.mirrors().end(),
                    &dcm_type::rpc_make_philosopher_hungry, lvertex.global_id(), newlockid);
    rmi.dc().set_sequentialization_key(pkey);
    local_philosopher_grabs_forks(p_id);
  }
  
  
  void make_philosopher_hungry_per_replica(lvid_type p_id) {
    local_vertex_type lvertex(graph.l_vertex(p_id));    
    philosopherset[p_id].lock.lock();
//    ASSERT_EQ((int)philosopherset[p_id].state, (int)THINKING);

    if (lvertex.owner() == rmi.procid()) {
      bool newlockid = !philosopherset[p_id].lockid;
      initialize_master_philosopher_as_hungry_locked(p_id, newlockid);
      
      logstream(LOG_DEBUG) << rmi.procid() <<
            ": Global HUNGRY " << lvertex.global_id()
            << "(" << (int)philosopherset[p_id].counter << ")" << std::endl;
    }
    else {
      bool newlockid = !philosopherset[p_id].lockid;
      philosopherset[p_id].lockid = newlockid;
      philosopherset[p_id].state = HUNGRY;
    }
    philosopherset[p_id].lock.unlock();
    local_philosopher_grabs_forks(p_id);
  }
  
  
  void philosopher_stops_eating(lvid_type p_id) {
    local_vertex_type lvertex(graph.l_vertex(p_id));    

    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Global STOP Eating " << lvertex.global_id() << std::endl;

    philosopherset[p_id].lock.lock();
//    ASSERT_EQ(philosopherset[p_id].state, (int)EATING);
    philosopherset[p_id].counter = 0;
    philosopherset[p_id].lock.unlock();
    unsigned char pkey = rmi.dc().set_sequentialization_key(lvertex.global_id() % 254 + 1);
    rmi.remote_call(lvertex.mirrors().begin(), lvertex.mirrors().end(),
                    &dcm_type::rpc_philosopher_stops_eating, lvertex.global_id());
    rmi.dc().set_sequentialization_key(pkey);
    local_philosopher_stops_eating(p_id);
  }

  void philosopher_stops_eating_per_replica(lvid_type p_id) {
    logstream(LOG_DEBUG) << rmi.procid() <<
            ": Global STOP Eating " << graph.global_vid(p_id) << std::endl;

//    ASSERT_EQ(philosopherset[p_id].state, (int)EATING);
    
    local_philosopher_stops_eating(p_id);
  }


  void no_locks_consistency_check() {
    // make sure all forks are dirty
    for (size_t i = 0;i < forkset.size(); ++i) ASSERT_TRUE(fork_dirty(i));
    // all philosophers are THINKING
    for (size_t i = 0;i < philosopherset.size(); ++i) ASSERT_TRUE(philosopherset[i].state == THINKING);
  }

  void print_out() {
  
  boost::unordered_set<size_t> eidset1;
  boost::unordered_set<size_t> eidset2;
  for (lvid_type v = 0; v < graph.num_local_vertices(); ++v) {
    local_vertex_type lvertex(graph.l_vertex(v));
    foreach(local_edge_type edge, lvertex.in_edges()) {
      size_t edgeid = edge.id();
      ASSERT_TRUE(eidset1.find(edgeid) == eidset1.end());
      eidset1.insert(edgeid);
    }
    foreach(local_edge_type edge, lvertex.out_edges()) {
      size_t edgeid = edge.id();
      ASSERT_TRUE(eidset2.find(edgeid) == eidset2.end());
      eidset2.insert(edgeid);
    }
  }
  ASSERT_EQ(eidset1.size(), eidset2.size());
  eidset1.clear(); eidset2.clear();
  complete_consistency_check();
  
    std::cout << "Philosophers\n";
    std::cout << "------------\n";
    for (lvid_type v = 0; v < graph.num_local_vertices(); ++v) {
      local_vertex_type lvertex(graph.l_vertex(v));
      std::cout << graph.global_vid(v) << ": " << (int)philosopherset[v].state << " " <<
                      philosopherset[v].forks_acquired << " " << philosopherset[v].num_edges << " ";
      if (philosopherset[v].forks_acquired == philosopherset[v].num_edges) std::cout << "---------------!";
      std::cout << "\n";
      std::cout << "\tin: ";
      foreach(local_edge_type edge, lvertex.in_edges()) {
        size_t edgeid = edge.id();
        if (fork_dirty(forkset[edgeid])) std::cout << edgeid << ":" << (int)forkset[edgeid] << " ";
      }
      std::cout << "\n\tout: ";
      foreach(local_edge_type edge, lvertex.out_edges()) {
        size_t edgeid = edge.id();
        if (fork_dirty(forkset[edgeid]))  std::cout << edgeid << ":" << (int)forkset[edgeid] << " ";
      }
      std::cout << "\n";
    }
  }
  
  void complete_consistency_check() {
    for (lvid_type v = 0; v < graph.num_local_vertices(); ++v) {
      local_vertex_type lvertex(graph.l_vertex(v));
      // count the number of forks I own
      size_t numowned = 0;
      size_t numowned_clean = 0;
      foreach(local_edge_type edge, lvertex.in_edges()) {
        size_t edgeid = edge.id();
        if (fork_owner(edgeid) == OWNER_TARGET) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      foreach(local_edge_type edge, lvertex.out_edges()) {
        size_t edgeid = edge.id();
        if (fork_owner(edgeid) == OWNER_SOURCE) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }

      ASSERT_EQ(philosopherset[v].forks_acquired, numowned);
      if (philosopherset[v].state == THINKING) {
        ASSERT_EQ(numowned_clean, 0);
      }
      else if (philosopherset[v].state == HUNGRY) {
        ASSERT_NE(philosopherset[v].num_edges, philosopherset[v].forks_acquired);
        // any fork I am unable to acquire. Must be clean, and the other person
        // must be eating or hungry
        foreach(local_edge_type edge, lvertex.in_edges()) {
          size_t edgeid = edge.id();
          // not owned
          if (fork_owner(edgeid) == OWNER_SOURCE) {
            if (philosopherset[edge.source().id()].state != EATING) {
              if (fork_dirty(edgeid)) {
                std::cout << (int)(forkset[edgeid]) << " "
                          << (int)philosopherset[edge.source().id()].state
                          << "->" << (int)philosopherset[edge.target().id()].state
                          << std::endl;
                ASSERT_FALSE(fork_dirty(edgeid));
              }
            }
            ASSERT_NE(philosopherset[edge.source().id()].state, (int)THINKING);
          }
        }
        foreach(local_edge_type edge, lvertex.out_edges()) {
          size_t edgeid = edge.id();
          if (fork_owner(edgeid) == OWNER_TARGET) {
            if (philosopherset[edge.target().id()].state != EATING) {
              if (fork_dirty(edgeid)) {
                std::cout << (int)(forkset[edgeid]) << " "
                          << (int)philosopherset[edge.source().id()].state
                          << "->"
                          << (int)philosopherset[edge.target().id()].state
                          << std::endl;
                ASSERT_FALSE(fork_dirty(edgeid));
              }
            }
            ASSERT_NE(philosopherset[edge.target().id()].state, (int)THINKING);
          }
        }

      }
      else if (philosopherset[v].state == EATING) {
        ASSERT_EQ(philosopherset[v].forks_acquired, philosopherset[v].num_edges);
      }
    }
  }
};

}

#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/engine/engine_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/engine/iengine.hpp>
#include <graphlab/engine/synchronous_engine.hpp>
#include <graphlab/engine/async_consistent_engine.hpp>
#include <graphlab/engine/omni_engine.hpp>

#include <graphlab/engine/execution_status.hpp>

//#include <graphlab/engine/asynchronous_engine.hpp>
//#include <graphlab/engine/engine_factory.hpp>
//#include <graphlab/engine/engine_options.hpp>


================================================
FILE: src/graphlab/engine/execution_status.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_EXECUTION_STATUS_HPP
#define GRAPHLAB_EXECUTION_STATUS_HPP

namespace graphlab {

  /**
   * \brief the reasons for execution completion.
   *
   * Because there are several reasons why the graphlab engine might
   * terminate the exec_status value is returned from the start
   * function after completing execution. 
   *
   */
  struct execution_status {
    enum status_enum {
      UNSET,          /** The default termination reason */      
      RUNNING,        /** The engine is currently running */
      TASK_DEPLETION, /**<Execution completed successfully due to
                              task depletion */      
      TIMEOUT,        /**< The execution completed after timing
                              out */
      
      FORCED_ABORT,     /**< the engine was stopped by calling force
                                abort */
      
      EXCEPTION        /**< the engine was stopped by an exception */
    }; // end of enum
    
    // Convenience function.
    static std::string to_string(status_enum es) {
      switch(es) {
        case UNSET: return "engine not run!";
        case RUNNING: return "engine is still running!"; 
        case TASK_DEPLETION: return "task depletion (natural)";
        case TIMEOUT: return "timeout";
        case FORCED_ABORT: return "forced abort";
        case EXCEPTION: return "exception";
        default: return "unknown";
      };
    } // end of to_string
  };


}; // end of namespace graphlab
#endif


================================================
FILE: src/graphlab/engine/iengine.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */

#ifndef GRAPHLAB_IENGINE_HPP
#define GRAPHLAB_IENGINE_HPP

#include <boost/bind.hpp>
#include <boost/functional.hpp>

#include <graphlab/vertex_program/icontext.hpp>
#include <graphlab/engine/execution_status.hpp>
#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/aggregation/distributed_aggregator.hpp>
#include <graphlab/vertex_program/op_plus_eq_concept.hpp>
#include <graphlab/graph/vertex_set.hpp>


#if defined(__cplusplus) && __cplusplus >= 201103L
// for whatever reason boost concept is broken under C++11. 
// Temporary workaround. TOFIX
#undef BOOST_CONCEPT_ASSERT
#define BOOST_CONCEPT_ASSERT(unused)
#endif

namespace graphlab {
  

  /**
   * \ingroup engine
   *
   * \brief The abstract interface of a GraphLab engine.  
   * 
   * A GraphLab engine is responsible for executing vertex programs in
   * parallel on one or more machines.  GraphLab has a collection of
   * different engines with different guarantees on how
   * vertex-programs are executed.  However each engine must implement
   * the iengine interface to allow them to be used "interchangeably."
   *
   * In addition to executing vertex programs GraphLab engines also
   * expose a synchronous aggregation framework. This allows users to
   * attach "map-reduce" style jobs that are run periodically on all
   * edges or vertices while GraphLab programs are actively running.
   *
   * Example Usage
   * =================
   *
   * One can use the iengine interface to select between different
   * engines at runtime:
   *
   * \code
   * iengine<pagerank>* engine_ptr = NULL;
   * if(cmdline_arg == "synchronous") {
   *   engine_ptr = new synchronous_engine<pagerank>(dc, graph, cmdopts);  
   * } else {
   *   engine_ptr = new async_consistent_engine<pagerank>(dc, graph, cmdopts);  
   * }
   * // Attach an aggregator
   * engine_ptr->add_edge_aggregator<float>("edge_map", 
   *                                        edge_map_fun, finalize_fun);
   * // Make it run every 3 seconds
   * engine_ptr->aggregate_periodic("edge_map");
   * // Signal all vertices
   * engine_ptr->signal_all();
   * // Run the engine
   * engine_ptr->start();
   * // do something interesting
   * delete engine_ptr; engine_ptr = NULL;
   * \endcode  
   *
   * @tparam VertexProgram The user defined vertex program which should extend the
   * \ref ivertex_program interface.
   */
  template<typename VertexProgram>
  class iengine {
  public:
    /**
     * \brief The user defined vertex program type which should extend
     * ivertex_program.
     */
    typedef VertexProgram vertex_program_type;

    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that vertex programs be default
     * constructible.
     *
     * \code
     * class vertex_program {
     * public:
     *   vertex_program() { }
     * };  
     * \endcode
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<vertex_program_type>));
    /// \endcond


    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab requires that the vertex programx type be
     * Serializable.  See \ref sec_serializable for detials.
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<vertex_program_type>));
    /// \endcond


    /**
     * \brief The user defined message type which is defined in
     * ivertex_program::message_type. 
     *
     */
    typedef typename vertex_program_type::message_type message_type;

    /**
     * \brief The graph type which is defined in
     * ivertex_program::graph_type and will typically be
     * \ref distributed_graph.
     */
    typedef typename vertex_program_type::graph_type graph_type;

    /**
     * \brief The vertex identifier type defined in 
     * \ref graphlab::vertex_id_type.
     */
    typedef typename graph_type::vertex_id_type vertex_id_type;  

    /**
     * \brief the vertex object type which contains a reference to the
     * vertex data and is defined in the iengine::graph_type 
     * (see for example \ref distributed_graph::vertex_type).
     */
    typedef typename graph_type::vertex_type    vertex_type;

    /**
     * \brief the edge object type which contains a reference to the
     * edge data and is defined in the iengine::graph_type (see for
     * example \ref distributed_graph::edge_type).
     */
    typedef typename graph_type::edge_type      edge_type;

    /**
     * \brief The context type which is passed into vertex programs as
     * a callback to the engine.  
     *
     * Most engines use the \ref graphlab::context implementation.
     */
    typedef typename vertex_program_type::icontext_type icontext_type;

    /**
     * \brief The type of the distributed aggregator used by each engine to
     * implement distributed aggregation.
     */   
    typedef distributed_aggregator<graph_type, icontext_type> aggregator_type;


    /**
     * \internal
     * \brief Virtual destructor required for inheritance
     */ 
    virtual ~iengine() {};
    
    /**
     * \brief Start the engine execution.
     *
     * Behavior details depend on the engine implementation. See the
     * implementation documentation for specifics.
     * 
     * @return the reason for termination
     */
    virtual execution_status::status_enum start() = 0;
   
    /**
     * \brief Compute the total number of updates (calls to apply)
     * executed since start was last invoked.
     *
     * \return Total number of updates
     */
    virtual size_t num_updates() const = 0;

    /**
     * \brief Get the elapsed time in seconds since start was last
     * called.
     * 
     * \return elapsed time in seconds
     */
    virtual float elapsed_seconds() const = 0;

    /**
     * \brief get the current iteration number.  This is not defined
     * for all engines in which case -1 is returned.
     *
     * \return the current iteration or -1 if not supported.
     */
    virtual int iteration() const { return -1; }

     
    /**
     * \brief Signals single a vertex with an optional message.
     * 
     * This function sends a message to particular vertex which will
     * receive that message on start. The signal function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::synchronous_engine<vprog> engine(dc, graph, opts);
     * engine.signal(0); // signal vertex zero
     * \endcode
     *
     * and _not_:
     *
     * \code
     * graphlab::synchronous_engine<vprog> engine(dc, graph, opts);
     * if(dc.procid() == 0) engine.signal(0); // signal vertex zero
     * \endcode
     *
     * Since signal is executed synchronously on all machines it
     * should only be used to schedule a small set of vertices. The
     * preferred method to signal a large set of vertices (e.g., all
     * vertices that are a certain type) is to use either the vertex
     * program init function or the aggregation framework.  For
     * example to signal all vertices that have a particular value one
     * could write:
     *
     * \code
     * struct bipartite_opt : 
     *   public graphlab::ivertex_program<graph_type, gather_type> {
     *   // The user defined init function
     *   void init(icontext_type& context, vertex_type& vertex) {
     *     // Signal myself if I am a certain type
     *     if(vertex.data().on_left) context.signal(vertex);
     *   }
     *   // other vastly more interesting code
     * };
     * \endcode
     *
     * @param [in] vid the vertex id to signal
     * @param [in] message the message to send to that vertex.  The
     * default message is sent if no message is provided. 
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    virtual void signal(vertex_id_type vertex,
                        const message_type& message = message_type()) = 0;
    
    /**
     * \brief Signal all vertices with a particular message.
     * 
     * This function sends the same message to all vertices which will
     * receive that message on start. The signal_all function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::synchronous_engine<vprog> engine(dc, graph, opts);
     * engine.signal_all(); // signal all vertices
     * \endcode
     *
     * and _not_:
     *
     * \code
     * graphlab::synchronous_engine<vprog> engine(dc, graph, opts);
     * if(dc.procid() == 0) engine.signal_all(); // signal vertex zero
     * \endcode
     *
     * The signal_all function is the most common way to send messages
     * to the engine.  For example in the pagerank application we want
     * all vertices to be active on the first round.  Therefore we
     * would write:
     *
     * \code
     * graphlab::synchronous_engine<pagerank> engine(dc, graph, opts);
     * engine.signal_all();
     * engine.start();
     * \endcode
     *
     * @param [in] message the message to send to all vertices.  The
     * default message is sent if no message is provided
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    virtual void signal_all(const message_type& message = message_type(),
                            const std::string& order = "shuffle") = 0;
  
    /**
     * \brief Signal a set of vertices with a particular message.
     * 
     * This function sends the same message to a set of vertices which will
     * receive that message on start. The signal_vset function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::synchronous_engine<vprog> engine(dc, graph, opts);
     * engine.signal_vset(vset); // signal a subset of vertices
     * \endcode
     *
     * signal_all() is conceptually equivalent to:
     *
     * \code
     * engine.signal_vset(graph.complete_set());
     * \endcode
     *
     * @param [in] vset The set of vertices to signal 
     * @param [in] message the message to send to all vertices.  The
     * default message is sent if no message is provided
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    virtual void signal_vset(const vertex_set& vset,
                             const message_type& message = message_type(),
                             const std::string& order = "shuffle") = 0;


     /** 
     * \brief Creates a vertex aggregator. Returns true on success.
     *        Returns false if an aggregator of the same name already
     *        exists.
     *
     * Creates a vertex aggregator associated to a particular key.
     * The map_function is called over every vertex in the graph, and the
     * return value of the map is summed. The finalize_function is then called
     * on the result of the reduction. The finalize_function is called on
     * all machines. The map_function should only read the graph data,
     * and should not make any modifications.
     *
     * ### Basic Usage 
     * For instance, if the graph has float vertex data, and float edge data:
     * \code
     *   typedef graphlab::distributed_graph<float, float> graph_type;
     * \endcode
     *
     * An aggregator can be constructed to compute the absolute sum of all the
     * vertex data. To do this, we define two functions.
     * \code
     * float absolute_vertex_data(engine_type::icontext_type& context,
     *                            graph_type::vertex_type vertex) {
     *   return std::fabs(vertex.data());
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, 
     *                     float total) {
     *   std::cout << total << "\n";
     * }
     * \endcode
     * 
     * Next, we define the aggregator in the engine by calling 
     * add_vertex_aggregator(). We must assign it a unique
     * name which will be used to reference this particular aggregate
     * operation. We shall call it "absolute_vertex_sum".
     * \code
     * engine.add_vertex_aggregator<float>("absolute_vertex_sum",
     *                                     absolute_vertex_data, 
     *                                     print_finalize);
     * \endcode
     *
     * When executed, the engine execute <code>absolute_vertex_data()</code>
     * on each vertex in the graph. <code>absolute_vertex_data()</code> 
     * reads the vertex data, and returns its absolute value. All return 
     * values are then summing them together using the float's += operator.
     * The final result is than passed to the <code>print_finalize</code>
     * function.  The template argument <code><float></code> is necessary to
     * provide information about the return type of
     * <code>absolute_vertex_data</code>.
     * 
     *
     * This aggregator can be run immediately by calling 
     * aggregate_now() with the name of the aggregator.
     * \code
     * engine.aggregate_now("absolute_vertex_sum");
     * \endcode
     *
     * Or can be arranged to run periodically together with the engine 
     * execution (in this example, every 1.5 seconds).
     * \code
     * engine.aggregate_periodic("absolute_vertex_sum", 1.5);
     * \endcode
     * 
     * Note that since finalize is called on <b>all machines</b>, multiple
     * copies of the total will be printed. If only one copy is desired,
     * see \ref graphlab::icontext::cout() "context.cout()" or to get 
     * the actual process ID using 
     * \ref graphlab::icontext::procid() "context.procid()"
     *
     * In practice, the reduction type can be any arbitrary user-defined type
     * as long as a += operator is defined. This permits great flexibility
     * in the type of operations the aggregator can perform.
     *
     * ### Details
     * The add_vertex_aggregator() function is also templatized over both
     * function types and there is no strong enforcement of the exact argument
     * types of the map function and the reduce function. For instance, in the
     * above example, the following print_finalize() variants may also be
     * accepted.
     *
     * \code
     * void print_finalize(engine_type::icontext_type& context, double total) {
     *   std::cout << total << "\n";
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, float& total) {
     *   std::cout << total << "\n";
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, const float& total) {
     *   std::cout << total << "\n";
     * }
     * \endcode
     * In particlar, the last variation may be useful for performance reasons
     * if the reduction type is large.
     *
     * ### Distributed Behavior
     * To obtain consistent distributed behavior in the distributed setting,
     * we designed the aggregator to minimize the amount of asymmetry among 
     * the machines. In particular, the finalize operation is guaranteed to be
     * called on all machines. This therefore permits global variables to be
     * modified on finalize since all machines are ensured to be eventually
     * consistent. 
     *
     * For instance, in the above example, print_finalize could
     * store the result in a global variable:
     * \code
     * void print_finalize(engine_type::icontext_type& context, float total) {
     *   GLOBAL_TOTAL = total;
     * }
     * \endcode 
     * which will make it accessible to all other running update functions.
     *
     * \tparam ReductionType The output of the map function. Must have
     *                        operator+= defined, and must be \ref sec_serializable.
     * \tparam VertexMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \tparam FinalizerType The type of the finalize function. 
     *                       Not generally needed.
     *                       Can be inferred by the compiler.
     *
     * \param [in] map_function The Map function to use. Must take an
     * \param [in] key The name of this aggregator. Must be unique.
     *                          \ref icontext_type& as its first argument, and
     *                          a \ref vertex_type, or a reference to a 
     *                          \ref vertex_type as its second argument.
     *                          Returns a ReductionType which must be summable
     *                          and \ref sec_serializable .
     * \param [in] finalize_function The Finalize function to use. Must take
     *                               an \ref icontext_type& as its first
     *                               argument and a ReductionType, or a
     *                               reference to a ReductionType as its second
     *                               argument.
     */
    template <typename ReductionType,
              typename VertexMapType,
              typename FinalizerType>
    bool add_vertex_aggregator(const std::string& key,
                               VertexMapType map_function,
                               FinalizerType finalize_function) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));

      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return false; // does not return
      }
      return aggregator->template add_vertex_aggregator<ReductionType>(key, map_function, 
                                                              finalize_function);
    } // end of add vertex aggregator

#if defined(__cplusplus) && __cplusplus >= 201103L
    /**
     * \brief An overload of add_vertex_aggregator for C++11 which does not
     *        require the user to provide the reduction type.
     *
     * This function is available only if the compiler has C++11 support.
     * Specifically, it uses C++11's decltype operation to infer the
     * reduction type, thus eliminating the need for the function
     * call to be templatized over the reduction type. For instance,
     * in the add_vertex_aggregator() example, it allows the following
     * code to be written:
     * \code
     * engine.add_vertex_aggregator("absolute_vertex_sum",
     *                              absolute_vertex_data, 
     *                              print_finalize);
     * \endcode
     *
     * \tparam VertexMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \tparam FinalizerType The type of the finalize function. 
     *                       Not generally needed.
     *                       Can be inferred by the compiler.
     *
     * \param [in] key The name of this aggregator. Must be unique.
     * \param [in] map_function The Map function to use. Must take an
     *                          \ref icontext_type& as its first argument, and
     *                          a \ref vertex_type, or a reference to a 
     *                          \ref vertex_type as its second argument.
     *                          Returns a ReductionType which must be summable
     *                          and \ref sec_serializable .
     * \param [in] finalize_function The Finalize function to use. Must take
     *                               an \ref icontext_type& as its first
     *                               argument and a ReductionType, or a
     *                               reference to a ReductionType as its second
     *                               argument.
     */
    template <typename VertexMapType,
              typename FinalizerType>
    bool add_vertex_aggregator(const std::string& key,
                               VertexMapType map_function,
                               FinalizerType finalize_function) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return false; // does not return
      }
      return aggregator->add_vertex_aggregator(key, map_function, 
                                               finalize_function);
    } // end of add vertex aggregator

#endif
   

    /** 
     * \brief Creates an edge aggregator. Returns true on success.
     *        Returns false if an aggregator of the same name already
     *        exists.
     *
     * Creates a edge aggregator associated to a particular key.
     * The map_function is called over every edge in the graph, and the
     * return value of the map is summed. The finalize_function is then called
     * on the result of the reduction. The finalize_function is called on
     * all machines. The map_function should only read the graph data,
     * and should not make any modifications.

     *
     * ### Basic Usage 
     * For instance, if the graph has float vertex data, and float edge data:
     * \code
     *   typedef graphlab::distributed_graph<float, float> graph_type;
     * \endcode
     *
     * An aggregator can be constructed to compute the absolute sum of all the
     * edge data. To do this, we define two functions.
     * \code
     * float absolute_edge_data(engine_type::icontext_type& context,
     *                          graph_type::edge_type edge) {
     *   return std::fabs(edge.data());
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, float total) {
     *   std::cout << total << "\n";
     * }
     * \endcode
     * 
     * Next, we define the aggregator in the engine by calling 
     * add_edge_aggregator(). We must assign it a unique
     * name which will be used to reference this particular aggregate
     * operation. We shall call it "absolute_edge_sum".
     * \code
     * engine.add_edge_aggregator<float>("absolute_edge_sum",
     *                                     absolute_edge_data, 
     *                                     print_finalize);
     * \endcode
     *
      *
     * When executed, the engine execute <code>absolute_edge_data()</code>
     * on each edge in the graph. <code>absolute_edge_data()</code> 
     * reads the edge data, and returns its absolute value. All return 
     * values are then summing them together using the float's += operator.
     * The final result is than passed to the <code>print_finalize</code>
     * function.  The template argument <code><float></code> is necessary to
     * provide information about the return type of
     * <code>absolute_edge_data</code>.
     * 
     *
     * This aggregator can be run immediately by calling 
     * aggregate_now() with the name of the aggregator.
     * \code
     * engine.aggregate_now("absolute_edge_sum");
     * \endcode
     *
     * Or can be arranged to run periodically together with the engine 
     * execution (in this example, every 1.5 seconds).
     * \code
     * engine.aggregate_periodic("absolute_edge_sum", 1.5);
     * \endcode
     * 
     * Note that since finalize is called on <b>all machines</b>, multiple
     * copies of the total will be printed. If only one copy is desired,
     * see \ref graphlab::icontext::cout() "context.cout()" or to get 
     * the actual process ID using 
     * \ref graphlab::icontext::procid() "context.procid()"
     *
     * ### Details
     * The add_edge_aggregator() function is also templatized over both
     * function types and there is no strong enforcement of the exact argument
     * types of the map function and the reduce function. For instance, in the
     * above example, the following print_finalize() variants may also be
     * accepted.
     *
     * \code
     * void print_finalize(engine_type::icontext_type& context, double total) {
     *   std::cout << total << "\n";
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, float& total) {
     *   std::cout << total << "\n";
     * }
     *
     * void print_finalize(engine_type::icontext_type& context, const float& total) {
     *   std::cout << total << "\n";
     * }
     * \endcode
     * In particlar, the last variation may be useful for performance reasons
     * if the reduction type is large.
     *
     * ### Distributed Behavior
     * To obtain consistent distributed behavior in the distributed setting,
     * we designed the aggregator to minimize the amount of asymmetry among 
     * the machines. In particular, the finalize operation is guaranteed to be
     * called on all machines. This therefore permits global variables to be
     * modified on finalize since all machines are ensured to be eventually
     * consistent. 
     *
     * For instance, in the above example, print_finalize could
     * store the result in a global variable:
     * \code
     * void print_finalize(engine_type::icontext_type& context, float total) {
     *   GLOBAL_TOTAL = total;
     * }
     * \endcode 
     * which will make it accessible to all other running update functions.
     *
     * \tparam ReductionType The output of the map function. Must have
     *                        operator+= defined, and must be \ref sec_serializable.
     * \tparam EdgeMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \tparam FinalizerType The type of the finalize function. 
     *                       Not generally needed.
     *                       Can be inferred by the compiler.
     *
     * \param [in] key The name of this aggregator. Must be unique.
     * \param [in] map_function The Map function to use. Must take an
     *                          \ref icontext_type& as its first argument, and
     *                          a \ref edge_type, or a reference to a 
     *                          \ref edge_type as its second argument.
     *                          Returns a ReductionType which must be summable
     *                          and \ref sec_serializable .
     * \param [in] finalize_function The Finalize function to use. Must take
     *                               an \ref icontext_type& as its first
     *                               argument and a ReductionType, or a
     *                               reference to a ReductionType as its second
     *                               argument.
     */
 
    template <typename ReductionType,
              typename EdgeMapType,
              typename FinalizerType>
    bool add_edge_aggregator(const std::string& key,
                             EdgeMapType map_function,
                             FinalizerType finalize_function) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!"
                             << std::endl;
        return false; // does not return
      }
      return aggregator->template add_edge_aggregator<ReductionType>
        (key, map_function, finalize_function);
    } // end of add edge aggregator


#if defined(__cplusplus) && __cplusplus >= 201103L

    /**
     * \brief An overload of add_edge_aggregator for C++11 which does not
     *        require the user to provide the reduction type.
     *
     * This function is available only if the compiler has C++11 support.
     * Specifically, it uses C++11's decltype operation to infer the
     * reduction type, thus eliminating the need for the function
     * call to be templatized over the reduction type. For instance,
     * in the add_edge_aggregator() example, it allows the following
     * code to be written:
     * \code
     * engine.add_edge_aggregator("absolute_edge_sum",
     *                              absolute_edge_data, 
     *                              print_finalize);
     * \endcode
     *
     * \tparam EdgeMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \tparam FinalizerType The type of the finalize function. 
     *                       Not generally needed.
     *                       Can be inferred by the compiler.
     *
     * \param [in] key The name of this aggregator. Must be unique.
     * \param [in] map_function The Map function to use. Must take an
     *                          \ref icontext_type& as its first argument, and
     *                          a \ref vertex_type, or a reference to a 
     *                          \ref vertex_type as its second argument.
     *                          Returns a ReductionType which must be summable
     *                          and \ref sec_serializable .
     * \param [in] finalize_function The Finalize function to use. Must take
     *                               an \ref icontext_type& as its first
     *                               argument and a ReductionType, or a
     *                               reference to a ReductionType as its second
     *                               argument.
     */
    template <typename EdgeMapType,
              typename FinalizerType>
    bool add_edge_aggregator(const std::string& key,
                             EdgeMapType map_function,
                             FinalizerType finalize_function) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return false; // does not return
      }
      return aggregator->add_edge_aggregator(key, map_function, 
                                             finalize_function);
    } // end of add edge aggregator
#endif

    /**
     * \brief Performs an immediate aggregation on a key
     *
     * Performs an immediate aggregation on a key. All machines must
     * call this simultaneously. If the key is not found,
     * false is returned. Otherwise returns true on success.
     *
     * For instance, the following code will run the aggregator
     * with the name "absolute_vertex_sum" immediately.
     * \code
     * engine.aggregate_now("absolute_vertex_sum");
     * \endcode
     *
     * \param[in] key Key to aggregate now. Must be a key
     *                 previously created by add_vertex_aggregator()
     *                 or add_edge_aggregator().
     * \return False if key not found, True on success.
     */
    bool aggregate_now(const std::string& key) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return false; // does not return
      }
      return aggregator->aggregate_now(key);
    } // end of aggregate_now


   /**
    * \brief Performs a map-reduce operation on each vertex in the 
    * graph returning the result.
    * 
    * Given a map function, map_reduce_vertices() call the map function on all
    * vertices in the graph. The return values are then summed together and the
    * final result returned. The map function should only read the vertex data
    * and should not make any modifications. map_reduce_vertices() must be
    * called on all machines simultaneously.
    *
    * ### Basic Usage 
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    * 
    * To compute an absolute sum over all the vertex data, we would write
    * a function which reads in each a vertex, and returns the absolute
    * value of the data on the vertex.
    * \code
    * float absolute_vertex_data(engine_type::icontext_type& context,
    *                            graph_type::vertex_type vertex) {
    *   return std::fabs(vertex.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = engine.map_reduce_vertices<float>(absolute_vertex_data);
    * \endcode
    * will call the <code>absolute_vertex_data()</code> function
    * on each vertex in the graph. <code>absolute_vertex_data()</code>
    * reads the value of the vertex and returns the absolute result.
    * This return values are then summed together and returned. 
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the mapfunction.
    *
    * ### Signalling
    * Another common use for the map_reduce_vertices() function is 
    * in signalling. Since the map function is passed a context, it
    * can be used to perform signalling of vertices for execution
    * during a later \ref start() "engine.start()" call.
    *
    * For instance, the following code will signal all vertices
    * with value >= 1
    * \code
    * graphlab::empty signal_vertices(engine_type::icontext_type& context,
    *                                 graph_type::vertex_type vertex) {
    *   if (vertex.data() >= 1) context.signal(vertex);
    *   return graphlab::empty()
    * }
    * \endcode
    * Note that in this case, we are not interested in a reduction
    * operation, and thus we return a graphlab::empty object.
    * Calling:
    * \code
    * engine.map_reduce_vertices<graphlab::empty>(signal_vertices);
    * \endcode
    * will run <code>signal_vertices()</code> on all vertices,
    * signalling all vertices with value <= 1
    *
    * ### Relations
    * The map function has the same structure as that in
    * add_vertex_aggregator() and may be reused in an aggregator.
    * This function is also very similar to 
    * graphlab::distributed_graph::map_reduce_vertices()
    * with the difference that this takes a context and thus
    * can be used to perform signalling.
    * Finally transform_vertices() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam VertexMapperType The type of the map function. 
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param mapfunction The map function to use. Must take an
    *                   \ref icontext_type& as its first argument, and
    *                   a \ref vertex_type, or a reference to a 
    *                   \ref vertex_type as its second argument.
    *                   Returns a ReductionType which must be summable
    *                   and \ref sec_serializable .
    */
    template <typename ReductionType, typename VertexMapperType>
    ReductionType map_reduce_vertices(VertexMapperType mapfunction) {
      aggregator_type* aggregator = get_aggregator();
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));

      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!"
                             << std::endl;
        return ReductionType(); // does not return
      }
      return aggregator->template map_reduce_vertices<ReductionType>(mapfunction);      
    }

   /**
    * \brief Performs a map-reduce operation on each edge in the 
    * graph returning the result.
    * 
    * Given a map function, map_reduce_edges() call the map function on all
    * edges in the graph. The return values are then summed together and the
    * final result returned. The map function should only read data
    * and should not make any modifications. map_reduce_edges() must be
    * called on all machines simultaneously.
    *
    * ### Basic Usage 
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    * 
    * To compute an absolute sum over all the edge data, we would write
    * a function which reads in each a edge, and returns the absolute
    * value of the data on the edge.
    * \code
    * float absolute_edge_data(engine_type::icontext_type& context,
    *                          graph_type::edge_type edge) {
    *   return std::fabs(edge.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = engine.map_reduce_edges<float>(absolute_edge_data);
    * \endcode
    * will call the <code>absolute_edge_data()</code> function
    * on each edge in the graph. <code>absolute_edge_data()</code>
    * reads the value of the edge and returns the absolute result.
    * This return values are then summed together and returned. 
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the mapfunction.
    *
    * ### Signalling
    * Another common use for the map_reduce_edges() function is 
    * in signalling. Since the map function is passed a context, it
    * can be used to perform signalling of edges for execution
    * during a later \ref start() "engine.start()" call.
    *
    * For instance, the following code will signal the source
    * vertex of each edge.
    * \code
    * graphlab::empty signal_source(engine_type::icontext_type& context,
    *                               graph_type::edge_type edge) {
    *   context.signal(edge.source());
    *   return graphlab::empty()
    * }
    * \endcode
    * Note that in this case, we are not interested in a reduction
    * operation, and thus we return a graphlab::empty object.
    * Calling:
    * \code
    * engine.map_reduce_edges<graphlab::empty>(signal_source);
    * \endcode
    * will run <code>signal_source()</code> on all edges,
    * signalling all source vertices.
    *
    * ### Relations
    * The map function has the same structure as that in
    * add_edge_aggregator() and may be reused in an aggregator.
    * This function is also very similar to 
    * graphlab::distributed_graph::map_reduce_edges()
    * with the difference that this takes a context and thus
    * can be used to perform signalling.
    * Finally transform_edges() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam EdgeMapperType The type of the map function. 
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param mapfunction The map function to use. Must take an
    *                   \ref icontext_type& as its first argument, and
    *                   a \ref edge_type, or a reference to a 
    *                   \ref edge_type as its second argument.
    *                   Returns a ReductionType which must be summable
    *                   and \ref sec_serializable .
    */
    template <typename ReductionType, typename EdgeMapperType>
    ReductionType map_reduce_edges(EdgeMapperType mapfunction) {
      aggregator_type* aggregator = get_aggregator();
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return ReductionType(); // does not return
      }
      return aggregator->template map_reduce_edges<ReductionType>(mapfunction);      
    }
    
   
    /**
     * \brief Performs a transformation operation on each vertex in the graph.
     *
     * Given a mapfunction, transform_vertices() calls mapfunction on 
     * every vertex in graph. The map function may make modifications
     * to the data on the vertex. transform_vertices() must be called by all
     * machines simultaneously.
     *
     * ### Basic Usage 
     * For instance, if the graph has integer vertex data, and integer edge
     * data: 
     * \code
     *   typedef graphlab::distributed_graph<size_t, size_t> graph_type;
     * \endcode
     * 
     * To set each vertex value to be the number of out-going edges,
     * we may write the following function:     
     * \code
     * void set_vertex_value(engine_type::icontext_type& context,
     *                          graph_type::vertex_type vertex) {
     *   vertex.data() = vertex.num_out_edges();
     * }
     * \endcode
     *
     * Calling transform_vertices():
     * \code
     *   engine.transform_vertices(set_vertex_value);
     * \endcode
     * will run the <code>set_vertex_value()</code> function
     * on each vertex in the graph, setting its new value. 
     *
     * ### Signalling
     * Since the mapfunction is provided with a context, the mapfunction
     * can also be used to perform signalling. For instance, the 
     * <code>set_vertex_value</code> function above may be modified to set 
     * the value of the vertex, but to also signal the vertex if
     * it has more than 5 outgoing edges.
     *
     * \code
     * void set_vertex_value(engine_type::icontext_type& context,
     *                          graph_type::vertex_type vertex) {
     *   vertex.data() = vertex.num_out_edges();
     *   if (vertex.num_out_edges() > 5) context.signal(vertex);
     * }
     * \endcode
     *
     * However, if the purpose of the function is to only signal
     * without making modifications, map_reduce_vertices() will be
     * more efficient as this function will additionally perform
     * distributed synchronization of modified data.
     *
     * ### Relations
     * map_reduce_vertices() provide similar signalling functionality, 
     * but should not make modifications to graph data. 
     * graphlab::distributed_graph::transform_vertices() provide
     * the same graph modification capabilities, but without a context
     * and thus cannot perform signalling.
     *
     * \tparam VertexMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \param mapfunction The map function to use. Must take an
     *                   \ref icontext_type& as its first argument, and
     *                   a \ref vertex_type, or a reference to a 
     *                   \ref vertex_type as its second argument.
     *                   Returns void.
     */ 
    template <typename VertexMapperType>
    void transform_vertices(VertexMapperType mapfunction) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!"
                             << std::endl;
        return;  // does not return
      }
      aggregator->transform_vertices(mapfunction);      
    }

    /**
     * \brief Performs a transformation operation on each edge in the graph.
     *
     * Given a mapfunction, transform_edges() calls mapfunction on 
     * every edge in graph. The map function may make modifications
     * to the data on the edge. transform_edges() must be called on
     * all machines simultaneously.
     *
     * ### Basic Usage 
     * For instance, if the graph has integer vertex data, and integer edge
     * data: 
     * \code
     *   typedef graphlab::distributed_graph<size_t, size_t> graph_type;
     * \endcode
     * 
     * To set each edge value to be the number of out-going edges
     * of the target vertex, we may write the following:
     * \code
     * void set_edge_value(engine_type::icontext_type& context,
     *                          graph_type::edge_type edge) {
     *   edge.data() = edge.target().num_out_edges();
     * }
     * \endcode
     *
     * Calling transform_edges():
     * \code
     *   engine.transform_edges(set_edge_value);
     * \endcode
     * will run the <code>set_edge_value()</code> function
     * on each edge in the graph, setting its new value. 
     *
     * ### Signalling
     * Since the mapfunction is provided with a context, the mapfunction
     * can also be used to perform signalling. For instance, the 
     * <code>set_edge_value</code> function above may be modified to set 
     * the value of the edge, but to also signal the target vertex. 
     *
     * \code
     * void set_edge_value(engine_type::icontext_type& context,
     *                          graph_type::edge_type edge) {
     *   edge.data() = edge.target().num_out_edges();
     *   context.signal(edge.target());
     * }
     * \endcode
     *
     * However, if the purpose of the function is to only signal
     * without making modifications, map_reduce_edges() will be
     * more efficient as this function will additionally perform
     * distributed synchronization of modified data.
     *
     * ### Relations
     * map_reduce_edges() provide similar signalling functionality, 
     * but should not make modifications to graph data. 
     * graphlab::distributed_graph::transform_edges() provide
     * the same graph modification capabilities, but without a context
     * and thus cannot perform signalling.
     *
     * \tparam EdgeMapperType The type of the map function. 
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \param mapfunction The map function to use. Must take an
     *                   \ref icontext_type& as its first argument, and
     *                   a \ref edge_type, or a reference to a 
     *                   \ref edge_type as its second argument.
     *                   Returns void.
     */ 
    template <typename EdgeMapperType>
    void transform_edges(EdgeMapperType mapfunction) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return; // does not return
      }
      aggregator->transform_edges(mapfunction);      
    }
    
    /**
     * \brief Requests that a particular aggregation key
     * be recomputed periodically when the engine is running.
     *
     * Requests that the aggregator with a given key be aggregated
     * every certain number of seconds when the engine is running.
     * Note that the period is prescriptive: in practice the actual
     * period will be larger than the requested period. 
     * Seconds must be >= 0;
     *
     * For instance, the following code will schedule the aggregator
     * with the name "absolute_vertex_sum" to run every 1.5 seconds.
     * \code
     * engine.aggregate_periodic("absolute_vertex_sum", 1.5);
     * \endcode
     *
     * \param [in] key Key to schedule. Must be a key
     *                 previously created by add_vertex_aggregator()
     *                 or add_edge_aggregator().
     * \param [in] seconds How frequently to schedule. Must be >=
     *    0. seconds == 0 will ensure that this key is continously
     *    recomputed.
     * 
     * All machines must call simultaneously.
     * \return Returns true if key is found and seconds >= 0,
     *         and false otherwise.
     */
    bool aggregate_periodic(const std::string& key, float seconds) {
      aggregator_type* aggregator = get_aggregator();
      if(aggregator == NULL) {
        logstream(LOG_FATAL) << "Aggregation not supported by this engine!" 
                             << std::endl;
        return false; // does not return 
      }
      return aggregator->aggregate_periodic(key, seconds);
    } // end of aggregate_periodic


    /**
     * \cond GRAPHLAB_INTERNAL
     * \internal
     * \brief This is used by iengine to get the 
     * \ref distributed_aggregator from the derived class to support
     * the local templated aggregator interface. 
     *
     * \return a pointer to the distributed aggregator for that
     * engine. If no aggregator is available or aggregation is not
     * supported then return NULL.
     */
    virtual aggregator_type* get_aggregator() = 0;
     /// \endcond
  }; // end of iengine interface

} // end of namespace graphlab

#endif


================================================
FILE: src/graphlab/engine/message_array.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_MESSAGE_ARRAY_HPP
#define GRAPHLAB_MESSAGE_ARRAY_HPP


#include <vector>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/scheduler/get_message_priority.hpp>
namespace graphlab {

  /**
   * \TODO DOCUMENT THIS CLASS
   */ 
  
  template<typename ValueType>
  class message_array {
  public:
    typedef ValueType value_type;

  private:    
    struct message_box {
      value_type value;
      bool empty;

      message_box() : empty(true) { }
      /** returns false if element is already present */
      inline bool add(const value_type& other, double& priority) {
        if (empty) {
          value = other;
          empty = false;
          priority = scheduler_impl::get_message_priority(value);
          return true;
        } else {
          value += other;
          priority = scheduler_impl::get_message_priority(value);
          return false;
        }
      }
                
      void clear() {
        value = value_type();
        empty = true;
      }
      
    }; 

    std::vector<message_box> message_vector;
    // lock array
    simple_spinlock lock_array[65536];
    size_t joincounter[65536];
    size_t addcounter[65536];

    /** Not assignable */
    void operator=(const message_array& other) { }

    static size_t get_lock_idx(size_t i) {
      return i % 65536;
    }
  public:
    /** Initialize the per vertex task set */
    message_array(size_t num_vertices = 0) :
              message_vector(num_vertices) { 
      for (size_t i = 0; i < 65536; ++i) {
        joincounter[i] = 0; 
        addcounter[i] = 0;
      }
    }

    /**
     * Resizes the number of elements this message vector can hold
     */
    void resize(size_t num_vertices) {
      message_vector.resize(num_vertices);
    }

    /** Add a message to the set returning false if a message is already
        present. */
    bool add(const size_t idx, 
             const value_type& val,
             double* message_priority = NULL) {
      double priority;
      size_t lockidx = get_lock_idx(idx);
      lock_array[lockidx].lock();
      bool ret = message_vector[idx].add(val, priority);
      joincounter[lockidx] += !ret;
      addcounter[lockidx]++;
      lock_array[lockidx].unlock();
      if (message_priority) (*message_priority) = priority;
      return ret;
    } 

    /** Returns the current message stored at idx and 
     * clears the message.
     * Returns true on success and false if there is no message
     * stored at the index.
     */
    bool get(const size_t idx,
             value_type& ret_val) {
      bool has_val = false;
      size_t lockidx = get_lock_idx(idx);
      lock_array[lockidx].lock();
      if (!message_vector[idx].empty) {
        ret_val = message_vector[idx].value;
        message_vector[idx].clear();
        has_val = true;
      }
      lock_array[lockidx].unlock();
      return has_val;
    }

    /** Returns the current message stored at idx. 
     * Returns true on success and false if there is no message
     * stored at the index.
     * Does not change the contents of the message
     */
    bool peek(const size_t idx,
              value_type& ret_val) {
      bool has_val = false;
      size_t lockidx = get_lock_idx(idx);
      lock_array[lockidx].lock();
      if (!message_vector[idx].empty) {
        ret_val = message_vector[idx].value;
        has_val = true;
      }
      lock_array[lockidx].unlock();
      return has_val;
    }
    

    /// clears the message at a particular idx
    void clear(const size_t idx) { 
      size_t lockidx = get_lock_idx(idx);
      lock_array[lockidx].lock();
      message_vector[idx].clear(); 
      lock_array[lockidx].unlock();
    }

    /// Returns true if the message at position idx is empty
    bool empty(const size_t idx) const {
      return message_vector[idx].empty;
    }

    bool empty() const {
      for (size_t i = 0;i < message_vector.size(); ++i) {
        if (!message_vector[i].empty) return false;
      }
      return true;
    }

    /// Returns the length of the message vector
    size_t size() const { 
      return message_vector.size(); 
    }
    
    size_t num_joins() const { 
      size_t total_joins = 0;
      for (size_t i = 0; i < 65536; ++i) {
        total_joins += joincounter[i];
      }
      return total_joins;
    }


    size_t num_adds() const { 
      size_t total_adds = 0;
      for (size_t i = 0; i < 65536; ++i) {
        total_adds += addcounter[i];
      }
      return total_adds;
    }

    /// not thread safe. Clears all contents
    void clear() {
      for (size_t i = 0; i < message_vector.size(); ++i) clear(i);
    }

    
  }; // end of vertex map

}; // end of namespace graphlab

#undef VALUE_PENDING

#endif


================================================
FILE: src/graphlab/engine/omni_engine.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_OMNI_ENGINE_HPP
#define GRAPHLAB_OMNI_ENGINE_HPP


#include <graphlab/options/graphlab_options.hpp>

#include <graphlab/engine/iengine.hpp>
#include <graphlab/engine/synchronous_engine.hpp>
#include <graphlab/engine/async_consistent_engine.hpp>

namespace graphlab {


  /**
   * \ingroup engines
   * 
   * \brief The omni engine encapsulates all the GraphLab engines
   * allowing the user to select which engine to use at runtime. 
   *
   * The actual engine type is set as a string argument to the
   * constructor of the omni_engine.  Forexample:
   *
   * \code
   * std::string exec_model = "synchronous";
   * // do something to determine the exec_model (possibly command
   * // line processing)
   * // Create the engine
   * graphlab::omni_engine<pagerank_vprog> engine(dc, graph, opts, exec_model);
   * \endcode
   *
   * The specific engine type can be overriden by command line
   * arguments (engine_opts="type=<type>"):
   *
   * \code
   * graphlab::omni_engine<pagerank_vprog> engine(dc, graph, opts, "synchronous");
   * \endcode
   *
   * then calling the progam with the command line options:
   * 
   \verbatim
   %> mpiexec -n 16 ./pagerank --engine_opts="type=synchronous"
   \endverbatim
   * 
   * The currently supproted types are:
   * 
   *  \li "synchronous" or "sync": uses the synchronous engine 
   *  (\ref synchronous_engine)
   *  \li "asynchronous" or "async": uses the asynchronous engine
   *  (\ref async_consistent_engine)
*
   * \see graphlab::synchronous_engine
   * \see graphlab::async_consistent_engine
   *
   */
  template<typename VertexProgram>
  class omni_engine : public iengine<VertexProgram> {
  public:
    /** \brief The type of the iengine */
    typedef iengine<VertexProgram> iengine_type;

    /**
     * \brief The user defined vertex program type which should extend
     * ivertex_program.
     */
    typedef VertexProgram vertex_program_type;

    /**
     * \brief The user defined message type which is defined in
     * ivertex_program::message_type. 
     *
     */
    typedef typename vertex_program_type::message_type message_type;

    /**
     * \brief The graph type which is defined in
     * ivertex_program::graph_type and will typically be
     * \ref distributed_graph.
     */
    typedef typename vertex_program_type::graph_type graph_type;


    /**
     * \brief The user defined type returned by the gather function.
     *
     * The gather type is defined in the \ref graphlab::ivertex_program
     * interface and is the value returned by the
     * \ref graphlab::ivertex_program::gather function.  The
     * gather type must have an <code>operator+=(const gather_type&
     * other)</code> function and must be \ref sec_serializable.
     */
    typedef typename VertexProgram::gather_type gather_type;

    /**
     * \brief The vertex identifier type defined in 
     * \ref graphlab::vertex_id_type.
     */
    typedef typename graph_type::vertex_id_type vertex_id_type;  

    /**
     * \brief The type of the distributed aggregator used by each engine to
     * implement distributed aggregation.
     */   
    typedef typename iengine_type::aggregator_type aggregator_type;    

    /**
     * \brief the type of synchronous engine
     */
    typedef synchronous_engine<VertexProgram> synchronous_engine_type;

    /**
     * \brief the type of asynchronous engine
     */
    typedef async_consistent_engine<VertexProgram> async_consistent_engine_type;


  private:

    /**
     * \brief A pointer to the actual engine in use.
     */
    iengine_type* engine_ptr;

    /**
     * \brief omni engines are not default constructible
     */
    omni_engine() { }

    /**
     * \brief omni engines are not copyable
     */
    omni_engine(const omni_engine& other ) { }


  public:

    /**
     * \brief Construct an omni engine for a given graph with the
     * default_engine_type unless the engine options contain an
     * alternative type.
     *
     * \param [in] dc a distributed control object that is used to
     * connect this engine with it's counter parts on other machines.
     * \param [in,out] graph the graph object that this engine will
     * transform.
     * \param [in] options the command line options which are used to
     * configure the engine.  Note that the engine option "type" can
     * be used to select the engine to use (synchronous or
     * asynchronous).
     * \param [in] default_engine_type The user must specify what
     * engine type to use if no command line option is given.
     */
    omni_engine(distributed_control& dc, graph_type& graph,
                const std::string& default_engine_type,
                const graphlab_options& options = graphlab_options()) :
      engine_ptr(NULL) {
      graphlab_options new_options = options;
      std::string engine_type = default_engine_type;
      options_map& engine_options = new_options.get_engine_args();
      if(engine_options.get_option("type", engine_type)) {
        // the engine option was set so use it instead
        // clear from the options map
        engine_options.options.erase("type");
      }
      // Process the engine types
      if(engine_type == "sync" || engine_type == "synchronous") {
        logstream(LOG_INFO) << "Using the Synchronous engine." << std::endl;
        engine_ptr = new synchronous_engine_type(dc, graph, new_options);
      } else if(engine_type == "async" || engine_type == "asynchronous") {
        logstream(LOG_INFO) << "Using the Asynchronous engine." << std::endl;
        engine_ptr = new async_consistent_engine_type(dc, graph, new_options);
      } else {
        logstream(LOG_FATAL) << "Invalid engine type: " << engine_type << std::endl;
      }
    } // end of constructor

    /**
     * \brief Destroy the internal engine destroying all vertex
     * programs associated with this engine.
     */
    ~omni_engine() {
      if(engine_ptr != NULL) {
        delete engine_ptr; engine_ptr = NULL;
      }
    } // end of destructor

    execution_status::status_enum start( ) { return engine_ptr->start(); }

    size_t num_updates() const { return engine_ptr->num_updates(); }
    float elapsed_seconds() const { return engine_ptr->elapsed_seconds(); }
    int iteration() const { return engine_ptr->iteration(); }
    void signal(vertex_id_type vertex,
                const message_type& message = message_type()) {
      engine_ptr->signal(vertex, message);
    }
    void signal_all(const message_type& message = message_type(),
                    const std::string& order = "shuffle") {
      engine_ptr->signal_all(message, order);
    }
    void signal_vset(const vertex_set& vset,
                     const message_type& message = message_type(),
                     const std::string& order = "shuffle") {
      engine_ptr->signal_vset(vset, message, order);
    }


    aggregator_type* get_aggregator() { return engine_ptr->get_aggregator(); }


  }; // end of omni_engine


}; // end of namespace graphlab

#endif


================================================
FILE: src/graphlab/engine/synchronous_engine.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SYNCHRONOUS_ENGINE_HPP
#define GRAPHLAB_SYNCHRONOUS_ENGINE_HPP

#include <deque>
#include <boost/bind.hpp>

#include <graphlab/engine/iengine.hpp>

#include <graphlab/vertex_program/ivertex_program.hpp>
#include <graphlab/vertex_program/icontext.hpp>
#include <graphlab/vertex_program/context.hpp>

#include <graphlab/engine/execution_status.hpp>
#include <graphlab/options/graphlab_options.hpp>


#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/fiber_barrier.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/util/memory_info.hpp>

#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/rpc/fiber_buffered_exchange.hpp>


#include <graphlab/macros_def.hpp>

namespace graphlab {


  /**
   * \ingroup engines
   *
   * \brief The synchronous engine executes all active vertex program
   * synchronously in a sequence of super-step (iterations) in both the
   * shared and distributed memory settings.
   *
   * \tparam VertexProgram The user defined vertex program which
   * should implement the \ref graphlab::ivertex_program interface.
   *
   *
   * ### Execution Semantics
   *
   * On start() the \ref graphlab::ivertex_program::init function is invoked
   * on all vertex programs in parallel to initialize the vertex program,
   * vertex data, and possibly signal vertices.
   * The engine then proceeds to execute a sequence of
   * super-steps (iterations) each of which is further decomposed into a
   * sequence of minor-steps which are also executed synchronously:
   * \li Receive all incoming messages (signals) by invoking the
   * \ref graphlab::ivertex_program::init function on all
   * vertex-programs that have incoming messages.  If a
   * vertex-program does not have any incoming messages then it is
   * not active during this super-step.
   * \li Execute all gathers for active vertex programs by invoking
   * the user defined \ref graphlab::ivertex_program::gather function
   * on the edge direction returned by the
   * \ref graphlab::ivertex_program::gather_edges function.  The gather
   * functions can modify edge data but cannot modify the vertex
   * program or vertex data and therefore can be executed on multiple
   * edges in parallel.  The gather type is used to accumulate (sum)
   * the result of the gather function calls.
   * \li Execute all apply functions for active vertex-programs by
   * invoking the user defined \ref graphlab::ivertex_program::apply
   * function passing the sum of the gather functions.  If \ref
   * graphlab::ivertex_program::gather_edges returns no edges then
   * the default gather value is passed to apply.  The apply function
   * can modify the vertex program and vertex data.
   * \li Execute all scatters for active vertex programs by invoking
   * the user defined \ref graphlab::ivertex_program::scatter function
   * on the edge direction returned by the
   * \ref graphlab::ivertex_program::scatter_edges function.  The scatter
   * functions can modify edge data but cannot modify the vertex
   * program or vertex data and therefore can be executed on multiple
   * edges in parallel.
   *
   * ### Construction
   *
   * The synchronous engine is constructed by passing in a
   * \ref graphlab::distributed_control object which manages coordination
   * between engine threads and a \ref graphlab::distributed_graph object
   * which is the graph on which the engine should be run.  The graph should
   * already be populated and cannot change after the engine is constructed.
   * In the distributed setting all program instances (running on each machine)
   * should construct an instance of the engine at the same time.
   *
   * Computation is initiated by signaling vertices using either
   * \ref graphlab::synchronous_engine::signal or
   * \ref graphlab::synchronous_engine::signal_all.  In either case all
   * machines should invoke signal or signal all at the same time.  Finally,
   * computation is initiated by calling the
   * \ref graphlab::synchronous_engine::start function.
   *
   * ### Example Usage
   *
   * The following is a simple example demonstrating how to use the engine:
   * \code
   * #include <graphlab.hpp>
   *
   * struct vertex_data {
   *   // code
   * };
   * struct edge_data {
   *   // code
   * };
   * typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
   * typedef float gather_type;
   * struct pagerank_vprog :
   *   public graphlab::ivertex_program<graph_type, gather_type> {
   *   // code
   * };
   *
   * int main(int argc, char** argv) {
   *   // Initialize control plain using mpi
   *   graphlab::mpi_tools::init(argc, argv);
   *   graphlab::distributed_control dc;
   *   // Parse command line options
   *   graphlab::command_line_options clopts("PageRank algorithm.");
   *   std::string graph_dir;
   *   clopts.attach_option("graph", &graph_dir, graph_dir,
   *                        "The graph file.");
   *   if(!clopts.parse(argc, argv)) {
   *     std::cout << "Error in parsing arguments." << std::endl;
   *     return EXIT_FAILURE;
   *   }
   *   graph_type graph(dc, clopts);
   *   graph.load_structure(graph_dir, "tsv");
   *   graph.finalize();
   *   std::cout << "#vertices: " << graph.num_vertices()
   *             << " #edges:" << graph.num_edges() << std::endl;
   *   graphlab::synchronous_engine<pagerank_vprog> engine(dc, graph, clopts);
   *   engine.signal_all();
   *   engine.start();
   *   std::cout << "Runtime: " << engine.elapsed_seconds();
   *   graphlab::mpi_tools::finalize();
   * }
   * \endcode
   *
   *
   *
   * <a name=engineopts>Engine Options</a>
   * =====================
   * The synchronous engine supports several engine options which can
   * be set as command line arguments using \c --engine_opts :
   *
   * \li <b>max_iterations</b>: (default: infinity) The maximum number
   * of iterations (super-steps) to run.
   *
   * \li <b>timeout</b>: (default: infinity) The maximum time in
   * seconds that the engine may run. When the time runs out the
   * current iteration is completed and then the engine terminates.
   *
   * \li <b>use_cache</b>: (default: false) This is used to enable
   * caching.  When caching is enabled the gather phase is skipped for
   * vertices that already have a cached value.  To use caching the
   * vertex program must either clear (\ref icontext::clear_gather_cache)
   * or update (\ref icontext::post_delta) the cache values of
   * neighboring vertices during the scatter phase.
   *
   * \li \b snapshot_interval If set to a positive value, a snapshot
   * is taken every this number of iterations. If set to 0, a snapshot
   * is taken before the first iteration. If set to a negative value,
   * no snapshots are taken. Defaults to -1. A snapshot is a binary
   * dump of the graph.
   *
   * \li \b snapshot_path If snapshot_interval is set to a value >=0,
   * this option must be specified and should contain a target basename
   * for the snapshot. The path including folder and file prefix in
   * which the snapshots should be saved.
   *
   * \see graphlab::omni_engine
   * \see graphlab::async_consistent_engine
   * \see graphlab::semi_synchronous_engine
   */
  template<typename VertexProgram>
  class synchronous_engine :
    public iengine<VertexProgram> {

  public:
    /**
     * \brief The user defined vertex program type. Equivalent to the
     * VertexProgram template argument.
     *
     * The user defined vertex program type which should implement the
     * \ref graphlab::ivertex_program interface.
     */
    typedef VertexProgram vertex_program_type;

    /**
     * \brief The user defined type returned by the gather function.
     *
     * The gather type is defined in the \ref graphlab::ivertex_program
     * interface and is the value returned by the
     * \ref graphlab::ivertex_program::gather function.  The
     * gather type must have an <code>operator+=(const gather_type&
     * other)</code> function and must be \ref sec_serializable.
     */
    typedef typename VertexProgram::gather_type gather_type;


    /**
     * \brief The user defined message type used to signal neighboring
     * vertex programs.
     *
     * The message type is defined in the \ref graphlab::ivertex_program
     * interface and used in the call to \ref graphlab::icontext::signal.
     * The message type must have an
     * <code>operator+=(const gather_type& other)</code> function and
     * must be \ref sec_serializable.
     */
    typedef typename VertexProgram::message_type message_type;

    /**
     * \brief The type of data associated with each vertex in the graph
     *
     * The vertex data type must be \ref sec_serializable.
     */
    typedef typename VertexProgram::vertex_data_type vertex_data_type;

    /**
     * \brief The type of data associated with each edge in the graph
     *
     * The edge data type must be \ref sec_serializable.
     */
    typedef typename VertexProgram::edge_data_type edge_data_type;

    /**
     * \brief The type of graph supported by this vertex program
     *
     * See graphlab::distributed_graph
     */
    typedef typename VertexProgram::graph_type  graph_type;

    /**
     * \brief The type used to represent a vertex in the graph.
     * See \ref graphlab::distributed_graph::vertex_type for details
     *
     * The vertex type contains the function
     * \ref graphlab::distributed_graph::vertex_type::data which
     * returns a reference to the vertex data as well as other functions
     * like \ref graphlab::distributed_graph::vertex_type::num_in_edges
     * which returns the number of in edges.
     *
     */
    typedef typename graph_type::vertex_type          vertex_type;

    /**
     * \brief The type used to represent an edge in the graph.
     * See \ref graphlab::distributed_graph::edge_type for details.
     *
     * The edge type contains the function
     * \ref graphlab::distributed_graph::edge_type::data which returns a
     * reference to the edge data.  In addition the edge type contains
     * the function \ref graphlab::distributed_graph::edge_type::source and
     * \ref graphlab::distributed_graph::edge_type::target.
     *
     */
    typedef typename graph_type::edge_type            edge_type;

    /**
     * \brief The type of the callback interface passed by the engine to vertex
     * programs.  See \ref graphlab::icontext for details.
     *
     * The context callback is passed to the vertex program functions and is
     * used to signal other vertices, get the current iteration, and access
     * information about the engine.
     */
    typedef icontext<graph_type, gather_type, message_type> icontext_type;

  private:

    /**
     * \brief Local vertex type used by the engine for fast indexing
     */
    typedef typename graph_type::local_vertex_type    local_vertex_type;

    /**
     * \brief Local edge type used by the engine for fast indexing
     */
    typedef typename graph_type::local_edge_type      local_edge_type;

    /**
     * \brief Local vertex id type used by the engine for fast indexing
     */
    typedef typename graph_type::lvid_type            lvid_type;

    std::vector<double> per_thread_compute_time;
    /**
     * \brief The actual instance of the context type used by this engine.
     */
    typedef context<synchronous_engine> context_type;
    friend class context<synchronous_engine>;


    /**
     * \brief The type of the distributed aggregator inherited from iengine
     */
    typedef typename iengine<vertex_program_type>::aggregator_type aggregator_type;

    /**
     * \brief The object used to communicate with remote copies of the
     * synchronous engine.
     */
    dc_dist_object< synchronous_engine<VertexProgram> > rmi;

    /**
     * \brief A reference to the distributed graph on which this
     * synchronous engine is running.
     */
    graph_type& graph;

    /**
     * \brief The number of CPUs used.
     */
    size_t ncpus;

    /**
     * \brief The local worker threads used by this engine
     */
    fiber_group threads;

    /**
     * \brief A thread barrier that is used to control the threads in the
     * thread pool.
     */
    fiber_barrier thread_barrier;

    /**
     * \brief The maximum number of super-steps (iterations) to run
     * before terminating.  If the max iterations is reached the
     * engine will terminate if their are no messages remaining.
     */
    size_t max_iterations;


   /* 
    * \brief When caching is enabled the gather phase is skipped for
    * vertices that already have a cached value.  To use caching the
    * vertex program must either clear (\ref icontext::clear_gather_cache)
    * or update (\ref icontext::post_delta) the cache values of
    * neighboring vertices during the scatter phase.
    */
    bool use_cache;

    /**
     * \brief A snapshot is taken every this number of iterations.
     * If snapshot_interval == 0, a snapshot is only taken before the first
     * iteration. If snapshot_interval < 0, no snapshots are taken.
     */
    int snapshot_interval;

    /// \brief The target base name the snapshot is saved in.
    std::string snapshot_path;

    /**
     * \brief A counter that tracks the current iteration number since
     * start was last invoked.
     */
    size_t iteration_counter;

    /**
     * \brief The time in seconds at which the engine started.
     */
    float start_time;

    /**
     * \brief The timeout time in seconds
     */
    float timeout;

    /**
     * \brief Schedules all vertices every iteration
     */
    bool sched_allv;

    /**
     * \brief Used to stop the engine prematurely
     */
    bool force_abort;

    /**
     * \brief The vertex locks protect access to vertex specific
     * data-structures including
     * \ref graphlab::synchronous_engine::gather_accum
     * and \ref graphlab::synchronous_engine::messages.
     */
    std::vector<simple_spinlock> vlocks;


    /**
     * \brief The elocks protect individual edges during gather and
     * scatter.  Technically there is a potential race since gather
     * and scatter can modify edge values and can overlap.  The edge
     * lock ensures that only one gather or scatter occurs on an edge
     * at a time.
     */
    std::vector<simple_spinlock> elocks;


    /**
     * \brief The vertex programs associated with each vertex on this
     * machine.
     */
    std::vector<vertex_program_type> vertex_programs;

    /**
     * \brief Vector of messages associated with each vertex.
     */
    std::vector<message_type> messages;

    /**
     * \brief Bit indicating whether a message is present for each vertex.
     */
    dense_bitset has_message;


    /**
     * \brief Gather accumulator used for each master vertex to merge
     * the result of all the machine specific accumulators (or
     * caches).
     *
     * The gather accumulator can be accessed by multiple threads at
     * once and therefore must be guarded by a vertex locks in
     * \ref graphlab::synchronous_engine::vlocks
     */
    std::vector<gather_type>  gather_accum;

    /**
     * \brief Bit indicating if the gather has accumulator contains any
     * values.
     *
     * While dense bitsets are thread safe the value of this bit must
     * change concurrently with the
     * \ref graphlab::synchronous_engine::gather_accum and therefore is
     * set while holding the lock in
     * \ref graphlab::synchronous_engine::vlocks.
     */
    dense_bitset has_gather_accum;


    /**
     * \brief This optional vector contains caches of previous gather
     * contributions for each machine.
     *
     * Caching is done locally and therefore a high-degree vertex may
     * have multiple caches (one per machine).
     */
    std::vector<gather_type>  gather_cache;

    /**
     * \brief A bit indicating if the local gather for that vertex is
     * available.
     */
    dense_bitset has_cache;

    /**
     * \brief A bit (for master vertices) indicating if that vertex is active
     * (received a message on this iteration).
     */
    dense_bitset active_superstep;

    /**
     * \brief  The number of local vertices (masters) that are active on this
     * iteration.
     */
    atomic<size_t> num_active_vertices;

    /**
     * \brief A bit indicating (for all vertices) whether to
     * participate in the current minor-step (gather or scatter).
     */
    dense_bitset active_minorstep;

    /**
     * \brief A counter measuring the number of applys that have been completed
     */
    atomic<size_t> completed_applys;


    /**
     * \brief The shared counter used coordinate operations between
     * threads.
     */
    atomic<size_t> shared_lvid_counter;


    /**
     * \brief The pair type used to synchronize vertex programs across machines.
     */
    typedef std::pair<vertex_id_type, vertex_program_type> vid_prog_pair_type;

    /**
     * \brief The type of the exchange used to synchronize vertex programs
     */
    typedef fiber_buffered_exchange<vid_prog_pair_type> vprog_exchange_type;

    /**
     * \brief The distributed exchange used to synchronize changes to
     * vertex programs.
     */
    vprog_exchange_type vprog_exchange;

    /**
     * \brief The pair type used to synchronize vertex across across machines.
     */
    typedef std::pair<vertex_id_type, vertex_data_type> vid_vdata_pair_type;

    /**
     * \brief The type of the exchange used to synchronize vertex data
     */
    typedef fiber_buffered_exchange<vid_vdata_pair_type> vdata_exchange_type;

    /**
     * \brief The distributed exchange used to synchronize changes to
     * vertex programs.
     */
    vdata_exchange_type vdata_exchange;

    /**
     * \brief The pair type used to synchronize the results of the gather phase
     */
    typedef std::pair<vertex_id_type, gather_type> vid_gather_pair_type;

    /**
     * \brief The type of the exchange used to synchronize gather
     * accumulators
     */
    typedef fiber_buffered_exchange<vid_gather_pair_type> gather_exchange_type;

    /**
     * \brief The distributed exchange used to synchronize gather
     * accumulators.
     */
    gather_exchange_type gather_exchange;

    /**
     * \brief The pair type used to synchronize messages
     */
    typedef std::pair<vertex_id_type, message_type> vid_message_pair_type;

    /**
     * \brief The type of the exchange used to synchronize messages
     */
    typedef fiber_buffered_exchange<vid_message_pair_type> message_exchange_type;

    /**
     * \brief The distributed exchange used to synchronize messages
     */
    message_exchange_type message_exchange;


    /**
     * \brief The distributed aggregator used to manage background
     * aggregation.
     */
    aggregator_type aggregator;

    DECLARE_EVENT(EVENT_APPLIES);
    DECLARE_EVENT(EVENT_GATHERS);
    DECLARE_EVENT(EVENT_SCATTERS);
    DECLARE_EVENT(EVENT_ACTIVE_CPUS);
  public:

    /**
     * \brief Construct a synchronous engine for a given graph and options.
     *
     * The synchronous engine should be constructed after the graph
     * has been loaded (e.g., \ref graphlab::distributed_graph::load)
     * and the graphlab options have been set
     * (e.g., \ref graphlab::command_line_options).
     *
     * In the distributed engine the synchronous engine must be called
     * on all machines at the same time (in the same order) passing
     * the \ref graphlab::distributed_control object.  Upon
     * construction the synchronous engine allocates several
     * data-structures to store messages, gather accumulants, and
     * vertex programs and therefore may require considerable memory.
     *
     * The number of threads to create are read from
     * \ref graphlab_options::get_ncpus "opts.get_ncpus()".
     *
     * See the <a href="#engineopts">main class documentation</a>
     * for details on the available options.
     *
     * @param [in] dc Distributed controller to associate with
     * @param [in,out] graph A reference to the graph object that this
     * engine will modify. The graph must be fully constructed and
     * finalized.
     * @param [in] opts A graphlab::graphlab_options object specifying engine
     *                  parameters.  This is typically constructed using
     *                  \ref graphlab::command_line_options.
     */
    synchronous_engine(distributed_control& dc, graph_type& graph,
                       const graphlab_options& opts = graphlab_options());


    /**
     * \brief Start execution of the synchronous engine.
     *
     * The start function begins computation and does not return until
     * there are no remaining messages or until max_iterations has
     * been reached.
     *
     * The start() function modifies the data graph through the vertex
     * programs and so upon return the data graph should contain the
     * result of the computation.
     *
     * @return The reason for termination
     */
    execution_status::status_enum start();

    // documentation inherited from iengine
    size_t num_updates() const;

    // documentation inherited from iengine
    void signal(vertex_id_type vid,
                const message_type& message = message_type());

    // documentation inherited from iengine
    void signal_all(const message_type& message = message_type(),
                    const std::string& order = "shuffle");

    void signal_vset(const vertex_set& vset,
                    const message_type& message = message_type(),
                    const std::string& order = "shuffle");


    // documentation inherited from iengine
    float elapsed_seconds() const;

    /**
     * \brief Get the current iteration number since start was last
     * invoked.
     *
     *  \return the current iteration
     */
    int iteration() const;


    /**
     * \brief Compute the total memory used by the entire distributed
     * system.
     *
     * @return The total memory used in bytes.
     */
    size_t total_memory_usage() const;

    /**
     * \brief Get a pointer to the distributed aggregator object.
     *
     * This is currently used by the \ref graphlab::iengine interface to
     * implement the calls to aggregation.
     *
     * @return a pointer to the local aggregator.
     */
    aggregator_type* get_aggregator();

    /**
     * \brief Initialize the engine and allocate datastructures for vertex, and lock,
     * clear all the messages.
     */
    void init();


  private:


    /**
     * \brief Resize the datastructures to fit the graph size (in case of dynamic graph). Keep all the messages
     * and caches.
     */
    void resize();

    /**
     * \brief This internal stop function is called by the \ref graphlab::context to
     * terminate execution of the engine.
     */
    void internal_stop();

    /**
     * \brief This function is called remote by the rpc to force the
     * engine to stop.
     */
    void rpc_stop();

    /**
     * \brief Signal a vertex.
     *
     * This function is called by the \ref graphlab::context.
     *
     * @param [in] vertex the vertex to signal
     * @param [in] message the message to send to that vertex.
     */
    void internal_signal(const vertex_type& vertex,
                         const message_type& message = message_type());

    /**
     * \brief Called by the context to signal an arbitrary vertex.
     *
     * @param [in] gvid the global vertex id of the vertex to signal
     * @param [in] message the message to send to that vertex.
     */
    void internal_signal_gvid(vertex_id_type gvid,
                              const message_type& message = message_type());

    /**
     * \brief This function tests if this machine is the master of
     * gvid and signals if successful.
     */
    void internal_signal_rpc(vertex_id_type gvid,
                              const message_type& message = message_type());


    /**
     * \brief Post a to a previous gather for a give vertex.
     *
     * This function is called by the \ref graphlab::context.
     *
     * @param [in] vertex The vertex to which to post a change in the sum
     * @param [in] delta The change in that sum
     */
    void internal_post_delta(const vertex_type& vertex,
                             const gather_type& delta);

    /**
     * \brief Clear the cached gather for a vertex if one is
     * available.
     *
     * This function is called by the \ref graphlab::context.
     *
     * @param [in] vertex the vertex for which to clear the cache
     */
    void internal_clear_gather_cache(const vertex_type& vertex);


    // Program Steps ==========================================================


    void thread_launch_wrapped_event_counter(boost::function<void(void)> fn) {
      INCREMENT_EVENT(EVENT_ACTIVE_CPUS, 1);
      fn();
      DECREMENT_EVENT(EVENT_ACTIVE_CPUS, 1);
    }

    /**
     * \brief Executes ncpus copies of a member function each with a
     * unique consecutive id (thread id).
     *
     * This function is used by the main loop to execute each of the
     * stages in parallel.
     *
     * The member function must have the type:
     *
     * \code
     * void synchronous_engine::member_fun(size_t threadid);
     * \endcode
     *
     * This function runs an rmi barrier after termination
     *
     * @tparam the type of the member function.
     * @param [in] member_fun the function to call.
     */
    template<typename MemberFunction>
    void run_synchronous(MemberFunction member_fun) {
      shared_lvid_counter = 0;
      if (ncpus <= 1) {
        INCREMENT_EVENT(EVENT_ACTIVE_CPUS, 1);
      }
      // launch the initialization threads
      for(size_t i = 0; i < ncpus; ++i) {
        fiber_control::affinity_type affinity;
        affinity.clear(); affinity.set_bit(i);
        boost::function<void(void)> invoke = boost::bind(member_fun, this, i);
        threads.launch(boost::bind(
              &synchronous_engine::thread_launch_wrapped_event_counter,
              this,
              invoke), affinity);
      }
      // Wait for all threads to finish
      threads.join();
      rmi.barrier();
      if (ncpus <= 1) {
        DECREMENT_EVENT(EVENT_ACTIVE_CPUS, 1);
      }
    } // end of run_synchronous

    // /**
    //  * \brief Initialize all vertex programs by invoking
    //  * \ref graphlab::ivertex_program::init on all vertices.
    //  *
    //  * @param thread_id the thread to run this as which determines
    //  * which vertices to process.
    //  */
    // void initialize_vertex_programs(size_t thread_id);

    /**
     * \brief Synchronize all message data.
     *
     * @param thread_id the thread to run this as which determines
     * which vertices to process.
     */
    void exchange_messages(size_t thread_id);


    /**
     * \brief Invoke the \ref graphlab::ivertex_program::init function
     * on all vertex programs that have inbound messages.
     *
     * @param thread_id the thread to run this as which determines
     * which vertices to process.
     */
    void receive_messages(size_t thread_id);


    /**
     * \brief Execute the \ref graphlab::ivertex_program::gather function on all
     * vertices that received messages for the edges specified by the
     * \ref graphlab::ivertex_program::gather_edges.
     *
     * @param thread_id the thread to run this as which determines
     * which vertices to process.
     */
    void execute_gathers(size_t thread_id);


    /**
     * \brief Execute the \ref graphlab::ivertex_program::apply function on all
     * all vertices that received messages in this super-step (active).
     *
     * @param thread_id the thread to run this as which determines
     * which vertices to process.
     */
    void execute_applys(size_t thread_id);

    /**
     * \brief Execute the \ref graphlab::ivertex_program::scatter function on all
     * vertices that received messages for the edges specified by the
     * \ref graphlab::ivertex_program::scatter_edges.
     *
     * @param thread_id the thread to run this as which determines
     * which vertices to process.
     */
    void execute_scatters(size_t thread_id);

    // Data Synchronization ===================================================
    /**
     * \brief Send the vertex program for the local vertex id to all
     * of its mirrors.
     *
     * @param [in] lvid the vertex to sync.  This muster must be the
     * master of that vertex.
     */
    void sync_vertex_program(lvid_type lvid, size_t thread_id);

    /**
     * \brief Receive all incoming vertex programs and update the
     * local mirrors.
     *
     * This function returns when there are no more incoming vertex
     * programs and should be called after a flush of the vertex
     * program exchange.
     */
    void recv_vertex_programs();

    /**
     * \brief Send the vertex data for the local vertex id to all of
     * its mirrors.
     *
     * @param [in] lvid the vertex to sync.  This machine must be the master
     * of that vertex.
     */
    void sync_vertex_data(lvid_type lvid, size_t thread_id);

    /**
     * \brief Receive all incoming vertex data and update the local
     * mirrors.
     *
     * This function returns when there are no more incoming vertex
     * data and should be called after a flush of the vertex data
     * exchange.
     */
    void recv_vertex_data();

    /**
     * \brief Send the gather value for the vertex id to its master.
     *
     * @param [in] lvid the vertex to send the gather value to
     * @param [in] accum the locally computed gather value.
     */
    void sync_gather(lvid_type lvid, const gather_type& accum,
                     size_t thread_id);


    /**
     * \brief Receive the gather values from the buffered exchange.
     *
     * This function returns when there is nothing left in the
     * buffered exchange and should be called after the buffered
     * exchange has been flushed
     */
    void recv_gathers();

    /**
     * \brief Send the accumulated message for the local vertex to its
     * master.
     *
     * @param [in] lvid the vertex to send
     */
    void sync_message(lvid_type lvid, const size_t thread_id);

    /**
     * \brief Receive the messages from the buffered exchange.
     *
     * This function returns when there is nothing left in the
     * buffered exchange and should be called after the buffered
     * exchange has been flushed
     */
    void recv_messages();


  }; // end of class synchronous engine


  /**
   * Constructs an synchronous distributed engine.
   * The number of threads to create are read from
   * opts::get_ncpus().
   *
   * Valid engine options (graphlab_options::get_engine_args()):
   * \arg \c max_iterations Sets the maximum number of iterations the
   * engine will run for.
   * \arg \c use_cache If set to true, partial gathers are cached.
   * See \ref gather_caching to understand the behavior of the
   * gather caching model and how it may be used to accelerate program
   * performance.
   *
   * \param dc Distributed controller to associate with
   * \param graph The graph to schedule over. The graph must be fully
   *              constructed and finalized.
   * \param opts A graphlab_options object containing options and parameters
   *             for the engine.
   */
  template<typename VertexProgram>
  synchronous_engine<VertexProgram>::
  synchronous_engine(distributed_control &dc,
                     graph_type& graph,
                     const graphlab_options& opts) :
    rmi(dc, this), graph(graph),
    ncpus(opts.get_ncpus()),
    threads(2*1024*1024 /* 2MB stack per fiber*/),
    thread_barrier(opts.get_ncpus()),
    max_iterations(-1), snapshot_interval(-1), iteration_counter(0),
    timeout(0), sched_allv(false),
    vprog_exchange(dc),
    vdata_exchange(dc),
    gather_exchange(dc),
    message_exchange(dc),
    aggregator(dc, graph, new context_type(*this, graph)) {
    // Process any additional options
    std::vector<std::string> keys = opts.get_engine_args().get_option_keys();
    per_thread_compute_time.resize(opts.get_ncpus());
    use_cache = false;
    foreach(std::string opt, keys) {
      if (opt == "max_iterations") {
        opts.get_engine_args().get_option("max_iterations", max_iterations);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: max_iterations = "
            << max_iterations << std::endl;
      } else if (opt == "timeout") {
        opts.get_engine_args().get_option("timeout", timeout);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: timeout = "
            << timeout << std::endl;
      } else if (opt == "use_cache") {
        opts.get_engine_args().get_option("use_cache", use_cache);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: use_cache = "
            << use_cache << std::endl;
      } else if (opt == "snapshot_interval") {
        opts.get_engine_args().get_option("snapshot_interval", snapshot_interval);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: snapshot_interval = "
            << snapshot_interval << std::endl;
      } else if (opt == "snapshot_path") {
        opts.get_engine_args().get_option("snapshot_path", snapshot_path);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: snapshot_path = "
            << snapshot_path << std::endl;
      } else if (opt == "sched_allv") {
        opts.get_engine_args().get_option("sched_allv", sched_allv);
        if (rmi.procid() == 0)
          logstream(LOG_EMPH) << "Engine Option: sched_allv = "
            << sched_allv << std::endl;
      } else {
        logstream(LOG_FATAL) << "Unexpected Engine Option: " << opt << std::endl;
      }
    }

    if (snapshot_interval >= 0 && snapshot_path.length() == 0) {
      logstream(LOG_FATAL)
        << "Snapshot interval specified, but no snapshot path" << std::endl;
    }
    INITIALIZE_EVENT_LOG(dc);
    ADD_CUMULATIVE_EVENT(EVENT_APPLIES, "Applies", "Calls");
    ADD_CUMULATIVE_EVENT(EVENT_GATHERS , "Gathers", "Calls");
    ADD_CUMULATIVE_EVENT(EVENT_SCATTERS , "Scatters", "Calls");
    ADD_INSTANTANEOUS_EVENT(EVENT_ACTIVE_CPUS, "Active Threads", "Threads");
    graph.finalize();
    init();
  } // end of synchronous engine


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>:: init() {
    resize();
    // Clear up
    force_abort = false;
    iteration_counter = 0;
    completed_applys = 0;
    has_message.clear();
    has_gather_accum.clear();
    has_cache.clear();
    active_superstep.clear();
    active_minorstep.clear();
  }


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>:: resize() {
    memory_info::log_usage("Before Engine Initialization");
    // Allocate vertex locks and vertex programs
    vlocks.resize(graph.num_local_vertices());
    vertex_programs.resize(graph.num_local_vertices());
    // allocate the edge locks
    //elocks.resize(graph.num_local_edges());
    // Allocate messages and message bitset
    messages.resize(graph.num_local_vertices(), message_type());
    has_message.resize(graph.num_local_vertices());
    // Allocate gather accumulators and accumulator bitset
    gather_accum.resize(graph.num_local_vertices(), gather_type());
    has_gather_accum.resize(graph.num_local_vertices());

    // If caching is used then allocate cache data-structures
    if (use_cache) {
      gather_cache.resize(graph.num_local_vertices(), gather_type());
      has_cache.resize(graph.num_local_vertices());
    }
    // Allocate bitset to track active vertices on each bitset.
    active_superstep.resize(graph.num_local_vertices());
    active_minorstep.resize(graph.num_local_vertices());

    // Print memory usage after initialization
    memory_info::log_usage("After Engine Initialization");
  }


  template<typename VertexProgram>
  typename synchronous_engine<VertexProgram>::aggregator_type*
  synchronous_engine<VertexProgram>::get_aggregator() {
    return &aggregator;
  } // end of get_aggregator


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::internal_stop() {
    for (size_t i = 0; i < rmi.numprocs(); ++i)
      rmi.remote_call(i, &synchronous_engine<VertexProgram>::rpc_stop);
  } // end of internal_stop

  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::rpc_stop() {
    force_abort = true;
  } // end of rpc_stop


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  signal(vertex_id_type gvid, const message_type& message) {
    if (vlocks.size() != graph.num_local_vertices())
      resize();
    rmi.barrier();
    internal_signal_rpc(gvid, message);
    rmi.barrier();
  } // end of signal


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  signal_all(const message_type& message, const std::string& order) {
    if (vlocks.size() != graph.num_local_vertices())
      resize();
    for(lvid_type lvid = 0; lvid < graph.num_local_vertices(); ++lvid) {
      if(graph.l_is_master(lvid)) {
        internal_signal(vertex_type(graph.l_vertex(lvid)), message);
      }
    }
  } // end of signal all


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  signal_vset(const vertex_set& vset,
             const message_type& message, const std::string& order) {
    if (vlocks.size() != graph.num_local_vertices())
      resize();
    for(lvid_type lvid = 0; lvid < graph.num_local_vertices(); ++lvid) {
      if(graph.l_is_master(lvid) && vset.l_contains(lvid)) {
        internal_signal(vertex_type(graph.l_vertex(lvid)), message);
      }
    }
  } // end of signal all


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  internal_signal(const vertex_type& vertex,
                  const message_type& message) {
    const lvid_type lvid = vertex.local_id();
    vlocks[lvid].lock();
    if( has_message.get(lvid) ) {
      messages[lvid] += message;
    } else {
      messages[lvid] = message;
      has_message.set_bit(lvid);
    }
    vlocks[lvid].unlock();
  } // end of internal_signal


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  internal_signal_gvid(vertex_id_type gvid, const message_type& message) {
    procid_t proc = graph.master(gvid);
    if(proc == rmi.procid()) internal_signal_rpc(gvid, message);
    else rmi.remote_call(proc, 
                         &synchronous_engine<VertexProgram>::internal_signal_rpc,
                         gvid, message);
  } 

  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  internal_signal_rpc(vertex_id_type gvid,
                      const message_type& message) {
    if (graph.is_master(gvid)) {
      internal_signal(graph.vertex(gvid), message);
    }
  } // end of internal_signal_rpc


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  internal_post_delta(const vertex_type& vertex, const gather_type& delta) {
    const bool caching_enabled = !gather_cache.empty();
    if(caching_enabled) {
      const lvid_type lvid = vertex.local_id();
      vlocks[lvid].lock();
      if( has_cache.get(lvid) ) {
        gather_cache[lvid] += delta;
      } else {
        // You cannot add a delta to an empty cache.  A complete
        // gather must have been run.
        // gather_cache[lvid] = delta;
        // has_cache.set_bit(lvid);
      }
      vlocks[lvid].unlock();
    }
  } // end of post_delta


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  internal_clear_gather_cache(const vertex_type& vertex) {
    const bool caching_enabled = !gather_cache.empty();
    const lvid_type lvid = vertex.local_id();
    if(caching_enabled && has_cache.get(lvid)) {
      vlocks[lvid].lock();
      gather_cache[lvid] = gather_type();
      has_cache.clear_bit(lvid);
      vlocks[lvid].unlock();
    }
  } // end of clear_gather_cache


  template<typename VertexProgram>
  size_t synchronous_engine<VertexProgram>::
  num_updates() const { return completed_applys.value; }

  template<typename VertexProgram>
  float synchronous_engine<VertexProgram>::
  elapsed_seconds() const { return timer::approx_time_seconds() - start_time; }

  template<typename VertexProgram>
  int synchronous_engine<VertexProgram>::
  iteration() const { return iteration_counter; }


  template<typename VertexProgram>
  size_t synchronous_engine<VertexProgram>::total_memory_usage() const {
    size_t allocated_memory = memory_info::allocated_bytes();
    rmi.all_reduce(allocated_memory);
    return allocated_memory;
  } // compute the total memory usage of the GraphLab system


  template<typename VertexProgram> execution_status::status_enum
  synchronous_engine<VertexProgram>::start() {
    if (vlocks.size() != graph.num_local_vertices())
      resize();
    completed_applys = 0;
    rmi.barrier();

    // Initialization code ==================================================
    // Reset event log counters?
    // Start the timer
    graphlab::timer timer; timer.start();
    start_time = timer::approx_time_seconds();
    iteration_counter = 0;
    force_abort = false;
    execution_status::status_enum termination_reason =
      execution_status::UNSET;
    // if (perform_init_vtx_program) {
    //   // Initialize all vertex programs
    //   run_synchronous( &synchronous_engine::initialize_vertex_programs );
    // }
    aggregator.start();
    rmi.barrier();

    if (snapshot_interval == 0) {
      graph.save_binary(snapshot_path);
    }

    float last_print = -5;
    if (rmi.procid() == 0) {
      logstream(LOG_EMPH) << "Iteration counter will only output every 5 seconds."
                        << std::endl;
    }
    // Program Main loop ====================================================
    while(iteration_counter < max_iterations && !force_abort ) {

      // Check first to see if we are out of time
      if(timeout != 0 && timeout < elapsed_seconds()) {
        termination_reason = execution_status::TIMEOUT;
        break;
      }

      bool print_this_round = (elapsed_seconds() - last_print) >= 5;

      if(rmi.procid() == 0 && print_this_round) {
        logstream(LOG_EMPH)
          << rmi.procid() << ": Starting iteration: " << iteration_counter
          << std::endl;
        last_print = elapsed_seconds();
      }
      // Reset Active vertices ----------------------------------------------
      // Clear the active super-step and minor-step bits which will
      // be set upon receiving messages
      active_superstep.clear(); active_minorstep.clear();
      has_gather_accum.clear();
      rmi.barrier();

      // Exchange Messages --------------------------------------------------
      // Exchange any messages in the local message vectors
      // if (rmi.procid() == 0) std::cout << "Exchange messages..." << std::endl;
      run_synchronous( &synchronous_engine::exchange_messages );
      /**
       * Post conditions:
       *   1) only master vertices have messages
       */

      // Receive Messages ---------------------------------------------------
      // Receive messages to master vertices and then synchronize
      // vertex programs with mirrors if gather is required
      //

      // if (rmi.procid() == 0) std::cout << "Receive messages..." << std::endl;
      num_active_vertices = 0;
      run_synchronous( &synchronous_engine::receive_messages );
      if (sched_allv) {
        active_minorstep.fill();
      }
      has_message.clear();
      /**
       * Post conditions:
       *   1) there are no messages remaining
       *   2) All masters that received messages have their
       *      active_superstep bit set
       *   3) All masters and mirrors that are to participate in the
       *      next gather phases have their active_minorstep bit
       *      set.
       *   4) num_active_vertices is the number of vertices that
       *      received messages.
       */

      // Check termination condition  ---------------------------------------
      size_t total_active_vertices = num_active_vertices;
      rmi.all_reduce(total_active_vertices);
      if (rmi.procid() == 0 && print_this_round)
        logstream(LOG_EMPH)
          << "\tActive vertices: " << total_active_vertices << std::endl;
      if(total_active_vertices == 0 ) {
        termination_reason = execution_status::TASK_DEPLETION;
        break;
      }


      // Execute gather operations-------------------------------------------
      // Execute the gather operation for all vertices that are active
      // in this minor-step (active-minorstep bit set).
      // if (rmi.procid() == 0) std::cout << "Gathering..." << std::endl;
      run_synchronous( &synchronous_engine::execute_gathers );
      // Clear the minor step bit since only super-step vertices
      // (only master vertices are required to participate in the
      // apply step)
      active_minorstep.clear(); // rmi.barrier();
      /**
       * Post conditions:
       *   1) gather_accum for all master vertices contains the
       *      result of all the gathers (even if they are drawn from
       *      cache)
       *   2) No minor-step bits are set
       */

      // Execute Apply Operations -------------------------------------------
      // Run the apply function on all active vertices
      // if (rmi.procid() == 0) std::cout << "Applying..." << std::endl;
      run_synchronous( &synchronous_engine::execute_applys );
      /**
       * Post conditions:
       *   1) any changes to the vertex data have been synchronized
       *      with all mirrors.
       *   2) all gather accumulators have been cleared
       *   3) If a vertex program is participating in the scatter
       *      phase its minor-step bit has been set to active (both
       *      masters and mirrors) and the vertex program has been
       *      synchronized with the mirrors.
       */


      // Execute Scatter Operations -----------------------------------------
      // Execute each of the scatters on all minor-step active vertices.
      run_synchronous( &synchronous_engine::execute_scatters );
      /**
       * Post conditions:
       *   1) NONE
       */
      if(rmi.procid() == 0 && print_this_round)
        logstream(LOG_EMPH) << "\t Running Aggregators" << std::endl;
      // probe the aggregator
      aggregator.tick_synchronous();

      ++iteration_counter;

      if (snapshot_interval > 0 && iteration_counter % snapshot_interval == 0) {
        graph.save_binary(snapshot_path);
      }
    }

    if (rmi.procid() == 0) {
      logstream(LOG_EMPH) << iteration_counter
                        << " iterations completed." << std::endl;
    }
    // Final barrier to ensure that all engines terminate at the same time
    double total_compute_time = 0;
    for (size_t i = 0;i < per_thread_compute_time.size(); ++i) {
      total_compute_time += per_thread_compute_time[i];
    }
    std::vector<double> all_compute_time_vec(rmi.numprocs());
    all_compute_time_vec[rmi.procid()] = total_compute_time;
    rmi.all_gather(all_compute_time_vec);

    size_t global_completed = completed_applys;
    rmi.all_reduce(global_completed);
    completed_applys = global_completed;
    rmi.cout() << "Updates: " << completed_applys.value << "\n";
    if (rmi.procid() == 0) {
      logstream(LOG_INFO) << "Compute Balance: ";
      for (size_t i = 0;i < all_compute_time_vec.size(); ++i) {
        logstream(LOG_INFO) << all_compute_time_vec[i] << " ";
      }
      logstream(LOG_INFO) << std::endl;
    }
    rmi.full_barrier();
    // Stop the aggregator
    aggregator.stop();
    // return the final reason for termination
    return termination_reason;
  } // end of start


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  exchange_messages(const size_t thread_id) {
    context_type context(*this, graph);
    const size_t TRY_RECV_MOD = 100;
    size_t vcount = 0;
    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset; // a word-size = 64 bit
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;
      // get the bit field from has_message
      size_t lvid_bit_block = has_message.containing_word(lvid_block_start);
      if (lvid_bit_block == 0) continue;
      // initialize a word sized bitfield
      local_bitset.clear();
      local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;
        // if the vertex is not local and has a message send the
        // message and clear the bit
        if(!graph.l_is_master(lvid)) {
          sync_message(lvid, thread_id);
          has_message.clear_bit(lvid);
          // clear the message to save memory
          messages[lvid] = message_type();
        }
        if(++vcount % TRY_RECV_MOD == 0) recv_messages();
      }
    } // end of loop over vertices to send messages
    message_exchange.partial_flush();
    // Finish sending and receiving all messages
    thread_barrier.wait();
    if(thread_id == 0) message_exchange.flush();
    thread_barrier.wait();
    recv_messages();
  } // end of exchange_messages


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  receive_messages(const size_t thread_id) {
    context_type context(*this, graph);
    const size_t TRY_RECV_MOD = 100;
    size_t vcount = 0;
    size_t nactive_inc = 0;
    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset; // a word-size = 64 bit

    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;
      // get the bit field from has_message
      size_t lvid_bit_block = has_message.containing_word(lvid_block_start);
      if (lvid_bit_block == 0) continue;
      // initialize a word sized bitfield
      local_bitset.clear();
      local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));

      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        // if this is the master of lvid and we have a message
        if(graph.l_is_master(lvid)) {
          // The vertex becomes active for this superstep
          active_superstep.set_bit(lvid);
          ++nactive_inc;
          // Pass the message to the vertex program
          vertex_type vertex = vertex_type(graph.l_vertex(lvid));
          vertex_programs[lvid].init(context, vertex, messages[lvid]);
          // clear the message to save memory
          messages[lvid] = message_type();
          if (sched_allv) continue;
          // Determine if the gather should be run
          const vertex_program_type& const_vprog = vertex_programs[lvid];
          const vertex_type const_vertex = vertex;
          if(const_vprog.gather_edges(context, const_vertex) !=
              graphlab::NO_EDGES) {
            active_minorstep.set_bit(lvid);
            sync_vertex_program(lvid, thread_id);
          }
        }
        if(++vcount % TRY_RECV_MOD == 0) recv_vertex_programs();
      }
    }

    num_active_vertices += nactive_inc;
    vprog_exchange.partial_flush();
    // Flush the buffer and finish receiving any remaining vertex
    // programs.
    thread_barrier.wait();
    if(thread_id == 0) {
      vprog_exchange.flush();
    }
    thread_barrier.wait();

    recv_vertex_programs();

  } // end of receive messages


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  execute_gathers(const size_t thread_id) {
    context_type context(*this, graph);
    const size_t TRY_RECV_MOD = 1000;
    size_t vcount = 0;
    const bool caching_enabled = !gather_cache.empty();
    timer ti;

    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset; // a word-size = 64 bit

    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;
      // get the bit field from has_message
      size_t lvid_bit_block = active_minorstep.containing_word(lvid_block_start);
      if (lvid_bit_block == 0) continue;
      // initialize a word sized bitfield
      local_bitset.clear();
      local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));

      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        bool accum_is_set = false;
        gather_type accum = gather_type();
        // if caching is enabled and we have a cache entry then use
        // that as the accum
        if( caching_enabled && has_cache.get(lvid) ) {
          accum = gather_cache[lvid];
          accum_is_set = true;
        } else {
          // recompute the local contribution to the gather
          const vertex_program_type& vprog = vertex_programs[lvid];
          local_vertex_type local_vertex = graph.l_vertex(lvid);
          const vertex_type vertex(local_vertex);
          const edge_dir_type gather_dir = vprog.gather_edges(context, vertex);
          // Loop over in edges
          size_t edges_touched = 0;
          vprog.pre_local_gather(accum);
          if(gather_dir == IN_EDGES || gather_dir == ALL_EDGES) {
            foreach(local_edge_type local_edge, local_vertex.in_edges()) {
              edge_type edge(local_edge);
              // elocks[local_edge.id()].lock();
              if(accum_is_set) { // \todo hint likely
                accum += vprog.gather(context, vertex, edge);
              } else {
                accum = vprog.gather(context, vertex, edge);
                accum_is_set = true;
              }
              ++edges_touched;
              // elocks[local_edge.id()].unlock();
            }
          } // end of if in_edges/all_edges
            // Loop over out edges
          if(gather_dir == OUT_EDGES || gather_dir == ALL_EDGES) {
            foreach(local_edge_type local_edge, local_vertex.out_edges()) {
              edge_type edge(local_edge);
              // elocks[local_edge.id()].lock();
              if(accum_is_set) { // \todo hint likely
                accum += vprog.gather(context, vertex, edge);
              } else {
                accum = vprog.gather(context, vertex, edge);
                accum_is_set = true;
              }
              // elocks[local_edge.id()].unlock();
              ++edges_touched;
            }
            INCREMENT_EVENT(EVENT_GATHERS, edges_touched);
          } // end of if out_edges/all_edges
          vprog.post_local_gather(accum);
          // If caching is enabled then save the accumulator to the
          // cache for future iterations.  Note that it is possible
          // that the accumulator was never set in which case we are
          // effectively "zeroing out" the cache.
          if(caching_enabled && accum_is_set) {
            gather_cache[lvid] = accum; has_cache.set_bit(lvid);
          } // end of if caching enabled
        }
        // If the accum contains a value for the local gather we put
        // that estimate in the gather exchange.
        if(accum_is_set) sync_gather(lvid, accum, thread_id);
        if(!graph.l_is_master(lvid)) {
          // if this is not the master clear the vertex program
          vertex_programs[lvid] = vertex_program_type();
        }

        // try to recv gathers if there are any in the buffer
        if(++vcount % TRY_RECV_MOD == 0) recv_gathers();
      }
    } // end of loop over vertices to compute gather accumulators
    per_thread_compute_time[thread_id] += ti.current_time();
    gather_exchange.partial_flush();
      // Finish sending and receiving all gather operations
    thread_barrier.wait();
    if(thread_id == 0) gather_exchange.flush();
    thread_barrier.wait();
    recv_gathers();
  } // end of execute_gathers


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  execute_applys(const size_t thread_id) {
    context_type context(*this, graph);
    const size_t TRY_RECV_MOD = 1000;
    size_t vcount = 0;
    timer ti;

    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset;  // allocate a word size = 64bits
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;
      // get the bit field from has_message
      size_t lvid_bit_block = active_superstep.containing_word(lvid_block_start);
      if (lvid_bit_block == 0) continue;
      // initialize a word sized bitfield
      local_bitset.clear();
      local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        // Only master vertices can be active in a super-step
        ASSERT_TRUE(graph.l_is_master(lvid));
        vertex_type vertex(graph.l_vertex(lvid));
        // Get the local accumulator.  Note that it is possible that
        // the gather_accum was not set during the gather.
        const gather_type& accum = gather_accum[lvid];
        INCREMENT_EVENT(EVENT_APPLIES, 1);
        vertex_programs[lvid].apply(context, vertex, accum);
        // record an apply as a completed task
        ++completed_applys;
        // Clear the accumulator to save some memory
        gather_accum[lvid] = gather_type();
        // synchronize the changed vertex data with all mirrors
        sync_vertex_data(lvid, thread_id);
        // determine if a scatter operation is needed
        const vertex_program_type& const_vprog = vertex_programs[lvid];
        const vertex_type const_vertex = vertex;
        if(const_vprog.scatter_edges(context, const_vertex) !=
           graphlab::NO_EDGES) {
          active_minorstep.set_bit(lvid);
          sync_vertex_program(lvid, thread_id);
        } else { // we are done so clear the vertex program
          vertex_programs[lvid] = vertex_program_type();
        }
      // try to receive vertex data
        if(++vcount % TRY_RECV_MOD == 0) {
          recv_vertex_programs();
          recv_vertex_data();
        }
      }
    } // end of loop over vertices to run apply

    per_thread_compute_time[thread_id] += ti.current_time();
    vprog_exchange.partial_flush();
    vdata_exchange.partial_flush();
      // Finish sending and receiving all changes due to apply operations
    thread_barrier.wait();
    if(thread_id == 0) { 
      vprog_exchange.flush(); vdata_exchange.flush(); 
    }
    thread_barrier.wait();
    recv_vertex_programs();
    recv_vertex_data();
  } // end of execute_applys


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  execute_scatters(const size_t thread_id) {
    context_type context(*this, graph);
    timer ti;
    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset; // allocate a word size = 64 bits
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;
      // get the bit field from has_message
      size_t lvid_bit_block = active_minorstep.containing_word(lvid_block_start);
      if (lvid_bit_block == 0) continue;
      // initialize a word sized bitfield
      local_bitset.clear();
      local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        const vertex_program_type& vprog = vertex_programs[lvid];
        local_vertex_type local_vertex = graph.l_vertex(lvid);
        const vertex_type vertex(local_vertex);
        const edge_dir_type scatter_dir = vprog.scatter_edges(context, vertex);
				size_t edges_touched = 0;
        // Loop over in edges
        if(scatter_dir == IN_EDGES || scatter_dir == ALL_EDGES) {
          foreach(local_edge_type local_edge, local_vertex.in_edges()) {
            edge_type edge(local_edge);
            // elocks[local_edge.id()].lock();
            vprog.scatter(context, vertex, edge);
            // elocks[local_edge.id()].unlock();
          }
					++edges_touched;
        } // end of if in_edges/all_edges
        // Loop over out edges
        if(scatter_dir == OUT_EDGES || scatter_dir == ALL_EDGES) {
          foreach(local_edge_type local_edge, local_vertex.out_edges()) {
            edge_type edge(local_edge);
            // elocks[local_edge.id()].lock();
            vprog.scatter(context, vertex, edge);
            // elocks[local_edge.id()].unlock();
          }
					++edges_touched;
        } // end of if out_edges/all_edges
				INCREMENT_EVENT(EVENT_SCATTERS, edges_touched);
        // Clear the vertex program
        vertex_programs[lvid] = vertex_program_type();
      } // end of if active on this minor step
    } // end of loop over vertices to complete scatter operation

    per_thread_compute_time[thread_id] += ti.current_time();
  } // end of execute_scatters


  // Data Synchronization ===================================================
  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  sync_vertex_program(lvid_type lvid, const size_t thread_id) {
    ASSERT_TRUE(graph.l_is_master(lvid));
    const vertex_id_type vid = graph.global_vid(lvid);
    local_vertex_type vertex = graph.l_vertex(lvid);
    foreach(const procid_t& mirror, vertex.mirrors()) {
      vprog_exchange.send(mirror,
                          std::make_pair(vid, vertex_programs[lvid]));
    }
  } // end of sync_vertex_program


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  recv_vertex_programs() {
    typename vprog_exchange_type::recv_buffer_type recv_buffer;
    while(vprog_exchange.recv(recv_buffer)) {
      for (size_t i = 0;i < recv_buffer.size(); ++i) {
        typename vprog_exchange_type::buffer_type& buffer = recv_buffer[i].buffer;
        foreach(const vid_prog_pair_type& pair, buffer) {
          const lvid_type lvid = graph.local_vid(pair.first);
          //      ASSERT_FALSE(graph.l_is_master(lvid));
          vertex_programs[lvid] = pair.second;
          active_minorstep.set_bit(lvid);
        }
      }
    }
  } // end of recv vertex programs


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  sync_vertex_data(lvid_type lvid, const size_t thread_id) {
    ASSERT_TRUE(graph.l_is_master(lvid));
    const vertex_id_type vid = graph.global_vid(lvid);
    local_vertex_type vertex = graph.l_vertex(lvid);
    foreach(const procid_t& mirror, vertex.mirrors()) {
      vdata_exchange.send(mirror, std::make_pair(vid, vertex.data()));
    }
  } // end of sync_vertex_data


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  recv_vertex_data() {
    typename vdata_exchange_type::recv_buffer_type recv_buffer;
    while(vdata_exchange.recv(recv_buffer)) {
      for (size_t i = 0;i < recv_buffer.size(); ++i) {
        typename vdata_exchange_type::buffer_type& buffer = recv_buffer[i].buffer;
        foreach(const vid_vdata_pair_type& pair, buffer) {
          const lvid_type lvid = graph.local_vid(pair.first);
          ASSERT_FALSE(graph.l_is_master(lvid));
          graph.l_vertex(lvid).data() = pair.second;
        }
      }
    }
  } // end of recv vertex data


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  sync_gather(lvid_type lvid, const gather_type& accum, const size_t thread_id) {
    if(graph.l_is_master(lvid)) {
      vlocks[lvid].lock();
      if(has_gather_accum.get(lvid)) {
        gather_accum[lvid] += accum;
      } else {
        gather_accum[lvid] = accum;
        has_gather_accum.set_bit(lvid);
      }
      vlocks[lvid].unlock();
    } else {
      const procid_t master = graph.l_master(lvid);
      const vertex_id_type vid = graph.global_vid(lvid);
      gather_exchange.send(master, std::make_pair(vid, accum));
    }
  } // end of sync_gather

  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  recv_gathers() {
    typename gather_exchange_type::recv_buffer_type recv_buffer;
    while(gather_exchange.recv(recv_buffer)) {
      for (size_t i = 0;i < recv_buffer.size(); ++i) {
        typename gather_exchange_type::buffer_type& buffer = recv_buffer[i].buffer;
        foreach(const vid_gather_pair_type& pair, buffer) {
          const lvid_type lvid = graph.local_vid(pair.first);
          const gather_type& accum = pair.second;
          ASSERT_TRUE(graph.l_is_master(lvid));
          vlocks[lvid].lock();
          if( has_gather_accum.get(lvid) ) {
            gather_accum[lvid] += accum;
          } else {
            gather_accum[lvid] = accum;
            has_gather_accum.set_bit(lvid);
          }
          vlocks[lvid].unlock();
        }
      }
    }
  } // end of recv_gather


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  sync_message(lvid_type lvid, const size_t thread_id) {
    ASSERT_FALSE(graph.l_is_master(lvid));
    const procid_t master = graph.l_master(lvid);
    const vertex_id_type vid = graph.global_vid(lvid);
    message_exchange.send(master, std::make_pair(vid, messages[lvid]));
  } // end of send_message


  template<typename VertexProgram>
  void synchronous_engine<VertexProgram>::
  recv_messages() {
    typename message_exchange_type::recv_buffer_type recv_buffer;
    while(message_exchange.recv(recv_buffer)) {
      for (size_t i = 0;i < recv_buffer.size(); ++i) {
        typename message_exchange_type::buffer_type& buffer = recv_buffer[i].buffer;
        foreach(const vid_message_pair_type& pair, buffer) {
          const lvid_type lvid = graph.local_vid(pair.first);
          ASSERT_TRUE(graph.l_is_master(lvid));
          vlocks[lvid].lock();
          if( has_message.get(lvid) ) {
            messages[lvid] += pair.second;
          } else {
            messages[lvid] = pair.second;
            has_message.set_bit(lvid);
          }
          vlocks[lvid].unlock();
        }
      }
    }
  } // end of recv_messages


}; // namespace


#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/engine/warp_engine.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_WARP_ENGINE
#define GRAPHLAB_WARP_ENGINE

#include <deque>
#include <boost/bind.hpp>

#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/scheduler/scheduler_factory.hpp>
#include <graphlab/scheduler/get_message_priority.hpp>
#include <graphlab/engine/iengine.hpp>
#include <graphlab/engine/execution_status.hpp>
#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/engine/distributed_chandy_misra.hpp>
#include <graphlab/engine/message_array.hpp>
#include <graphlab/serialization/serialize_to_from_string.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/util/memory_info.hpp>
#include <graphlab/util/generics/conditional_addition_wrapper.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/rpc/fiber_async_consensus.hpp>
#include <graphlab/aggregation/distributed_aggregator.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
#include <graphlab/macros_def.hpp>


namespace graphlab {

namespace warp {

  /**
   * \ingroup warp
   *
   * \brief The warp engine executed update functions
   * asynchronously and can ensure mutual exclusion such that adjacent vertices
   * are never executed simultaneously. The default mode is "factorized"
   * consistency in which only individual gathers/applys/
   * scatters are guaranteed to be consistent, but this can be strengthened to
   * provide full mutual exclusion.
   *
   * ### Execution Semantics
   * The update function is a simple user defined function of the type
   *
   * \code
   * void update_function(engine_type::context& context,
   *                      graph_type::vertex_type vertex) {
   * }
   * \endcode
   *
   * Based on a scheduler, update functions are executed on each scheduled 
   * vertex. All computation is performed from within fine-grained threads
   * called fibers, which allows to create thousands of such fibers, thus
   * hiding distributed communication latency.
   *
   * Within the update function, All blocking warp functions such as 
   * \ref graphlab::warp::map_reduce_neighborhood, 
   * \ref graphlab::warp::transform_neighborhood, and 
   * \ref graphlab::warp::broadcast_neighborhood
   * can be used to make changes to the graph data, and to schedule other 
   * vertices for computation.
   *
   * The engine stops when the scheduler is empty.
   *
   * ### Construction
   *
   * The warp engine is constructed by passing in a
   * \ref graphlab::distributed_control object which manages coordination
   * between engine threads and a \ref graphlab::distributed_graph object
   * which is the graph on which the engine should be run.  
   * 
   * Computation is initiated by signaling vertices using either
   * \ref graphlab::warp_engine::signal or
   * \ref graphlab::warp_engine::signal_all.  In either case all
   * machines should invoke signal or signal all at the same time.  Finally,
   * computation is initiated by calling the
   * \ref graphlab::warp_engine::start function.
   *
   * \see warp::map_reduce_neighborhood()
   * \see warp::transform_neighborhood()
   * \see warp::broadcast_neighborhood()
   *
   * ### Example Usage
   *
   * The following is a simple example demonstrating how to use the engine:
   * \code
   * #include <graphlab.hpp>
   *
   * struct vertex_data {
   *   // code
   * };
   * struct edge_data {
   *   // code
   * };
   * typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
   * typedef graphlab::warp_engine<graph_type> engine_type;
   *
   * void pagerank(engine_type::context& context,
   *               graph_type::vertex_type vertex) {
   *   ... 
   * } 
   *
   *
   * int main(int argc, char** argv) {
   *   // Initialize control plain using mpi
   *   graphlab::mpi_tools::init(argc, argv);
   *   graphlab::distributed_control dc;
   *   // Parse command line options
   *   graphlab::command_line_options clopts("PageRank algorithm.");
   *   std::string graph_dir;
   *   clopts.attach_option("graph", graph_dir,
   *                        "The graph file.");
   *   if(!clopts.parse(argc, argv)) {
   *     std::cout << "Error in parsing arguments." << std::endl;
   *     return EXIT_FAILURE;
   *   }
   *   graph_type graph(dc, clopts);
   *   graph.load_structure(graph_dir, "tsv");
   *   graph.finalize();
   *   std::cout << "#vertices: " << graph.num_vertices()
   *             << " #edges:" << graph.num_edges() << std::endl;
   *   engine_type engine(dc, graph, clopts);
   *   engine.set_update_function(pagerank);
   *   engine.signal_all();
   *   engine.start();
   *   std::cout << "Runtime: " << engine.elapsed_seconds();
   *   graphlab::mpi_tools::finalize();
   * }
   * \endcode
   *
   *
   * <a name=engineopts>Engine Options</a>
   * =========================
   * The warp engine supports several engine options which can
   * be set as command line arguments using \c --engine_opts :
   *
   * \li \b timeout (default: infinity) Maximum time in seconds the engine will
   * run for. The actual runtime may be marginally greater as the engine
   * waits for all threads and processes to flush all active tasks before
   * returning.
   * \li \b factorized (default: true) Set to true to weaken the consistency
   * model to factorized consistency where only individual gather/apply/scatter
   * calls are guaranteed to be locally consistent. Can produce massive
   * increases in throughput at a consistency penalty.
   * \li \b nfibers (default: 10000) Number of fibers to use
   * \li \b stacksize (default: 16384) Stacksize of each fiber.
   */
  template <typename GraphType, typename MessageType = graphlab::empty>
  class warp_engine {

  public:
    /**
     * \brief The user defined message type used to signal neighboring
     * vertex programs.
     */
    typedef MessageType message_type;

    /**
     * The type of the graph associated with this engine.
     */
    typedef GraphType graph_type;

    /**
     * \brief The type of data associated with each vertex in the graph
     *
     * The vertex data type must be \ref sec_serializable.
     */
    typedef typename graph_type::vertex_data_type vertex_data_type;

    /**
     * \brief The type of data associated with each edge in the graph
     *
     * The edge data type must be \ref sec_serializable.
     */
    typedef typename graph_type::edge_data_type edge_data_type;


     /**
     * \brief The type used to represent a vertex in the graph.
     * See \ref graphlab::distributed_graph::vertex_type for details
     *
     * The vertex type contains the function
     * \ref graphlab::distributed_graph::vertex_type::data which
     * returns a reference to the vertex data as well as other functions
     * like \ref graphlab::distributed_graph::vertex_type::num_in_edges
     * which returns the number of in edges.
     *
     */
    typedef typename graph_type::vertex_type          vertex_type;

    /**
     * \brief The type used to represent an edge in the graph.
     * See \ref graphlab::distributed_graph::edge_type for details.
     *
     * The edge type contains the function
     * \ref graphlab::distributed_graph::edge_type::data which returns a
     * reference to the edge data.  In addition the edge type contains
     * the function \ref graphlab::distributed_graph::edge_type::source and
     * \ref graphlab::distributed_graph::edge_type::target.
     *
     */
    typedef typename graph_type::edge_type            edge_type;


    struct context {
      typedef warp_engine engine_type;
      typedef typename engine_type::graph_type graph_type;
      typedef typename graph_type::vertex_type vertex_type;
      typedef typename graph_type::edge_type edge_type;
      typedef typename graph_type::local_vertex_type local_vertex_type;

      warp_engine& engine;
      graph_type& graph;
      std::string original_value;
      vertex_type vtx;
      bool vtx_set;

      context(warp_engine& engine, graph_type& graph, 
              vertex_type vtx):
          engine(engine), 
          graph(graph), 
          vtx(vtx),
          vtx_set(true) { 
            set_synchronized();
        }
      

      context(warp_engine& engine, graph_type& graph):
          engine(engine), 
          graph(graph), 
          vtx(graph, 0),
          vtx_set(false) { 
        }
      
      /**
       * \brief Get the total number of vertices in the graph.
       *
       * \return the total number of vertices in the entire graph.
       */
      size_t num_vertices() const { return graph.num_vertices(); }

      /**
       * \brief Get the number of edges in the graph.
       *
       * Each direction counts as a separate edge.
       *
       * \return the total number of edges in the entire graph.
       */ 
      size_t num_edges() const { return graph.num_edges(); }

      /**
       * \brief Get the id of this process.
       *
       * The procid is a number between 0 and 
       * \ref graphlab::icontext::num_procs
       * 
       * \warning Each process may have many threads
       *
       * @return the process of this machine.
       */
      size_t procid() const { return graph.procid(); }

      /**
       * \brief Get the number of processes in the current execution.
       *
       * This is typically the number of mpi jobs created:
       * \code
       * %> mpiexec -n 16 ./pagerank
       * \endcode
       * would imply that num_procs() returns 16.
       *
       * @return the number of processes in the current execution
       */
      size_t num_procs() const { return graph.numprocs(); }

      /**
       * \brief Returns a standard output object (like cout)
       *        which only prints once even when running distributed.
       * 
       * This returns a C++ standard output stream object
       * which maps directly to std::cout on machine with 
       * process ID 0, and to empty output streamss
       * on all other processes. Calling,
       * \code
       *   context.cout() << "Hello World!";
       * \endcode
       * will therefore only print if the code is run on machine 0.
       * This is useful in the finalize operation in aggregators.
       */
      std::ostream& cout() const {
        return graph.dc().cout();
      }

      /**
       * \brief Returns a standard error object (like cerr)
       *        which only prints once even when running distributed.
       * 
       * This returns a C++ standard output stream object
       * which maps directly to std::cerr on machine with 
       * process ID 0, and to empty output streamss
       * on all other processes. Calling,
       * \code
       *   context.cerr() << "Hello World!";
       * \endcode
       * will therefore only print if the code is run on machine 0.
       * This is useful in the finalize operation in aggregators.
       */

      std::ostream& cerr() const {
        return graph.dc().cerr();
      }

      /**
       * \brief Get the elapsed time in seconds since start was called.
       * 
       * \return runtine in seconds
       */      
      float elapsed_seconds() const { return engine.elapsed_seconds(); }

      /**
       * \brief Return the current interation number (if supported).
       *
       * \return the current interation number if support or -1
       * otherwise.
       */
      int iteration() const { return -1; }

      /**
       * \brief Signal the engine to stop executing additional update
       * functions.
       *
       * \warning The execution engine will stop *eventually* and
       * additional update functions may be executed prior to when the
       * engine stops. For-example the synchronous engine (see \ref
       * synchronous_engine) will complete the current super-step before
       * terminating.
       */
      void stop() { engine.internal_stop(); }

      /**
       * \brief Signal a vertex with a particular message.
       *
       * This function is an essential part of the GraphLab abstraction
       * and is used to encode iterative computation. Typically a vertex
       * program will signal neighboring vertices during the scatter
       * phase.  A vertex program may choose to signal neighbors on when
       * changes made during the previous phases break invariants or warrant
       * future computation on neighboring vertices.
       * 
       * The signal function takes two arguments. The first is mandatory
       * and specifies which vertex to signal.  The second argument is
       * optional and is used to send a message.  If no message is
       * provided then the default message is used.
       *
       * \param vertex [in] The vertex to send the message to
       * \param message [in] The message to send, defaults to message_type(). 
       */
      void signal(const vertex_type& vertex, 
                  const message_type& message = message_type()) {
        engine.internal_signal(vertex, message);
      }


      /**
       * \brief Signal an arbitrary vertex ID with a particular message.
       *
       * This function is an essential part of the GraphLab abstraction
       * and is used to encode iterative computation. Typically a vertex
       * program will signal neighboring vertices during the scatter
       * phase.  A vertex program may choose to signal neighbors on when
       * changes made during the previous phases break invariants or warrant
       * future computation on neighboring vertices.
       * 
       * The signal function takes two arguments. The first is mandatory
       * and specifies which vertex to signal.  The second argument is
       * optional and is used to send a message.  If no message is
       * provided then the default message is used.
       *
       * \param vertex [in] The vertex to send the message to
       * \param message [in] The message to send, defaults to message_type(). 
       */
      void signal(vertex_id_type gvid, 
                  const message_type& message = message_type()) {
        engine.internal_signal_gvid(gvid, message);
      }


      /**
       * \internal
       * \brief Flags that this vertex was synchronized.
       */
      void set_synchronized() {
        if (vtx_set && graph.l_is_master(vtx.local_id())) {
          original_value = serialize_to_string(vtx.data());
        }
      }

      /**
       * \brief Synchronizes all copies of this vertex
       * 
       * If the current vertex value has changed, copy the vertex value to
       * all mirrors. This is for advanced use!
       * Under most circumstances you should not need to use 
       * this function directly.
       */
      void synchronize() {
        if (vtx_set && graph.l_is_master(vtx.local_id())) {
          std::string new_value = serialize_to_string(vtx.data());
          if (original_value != new_value) {
            // synchronize this vertex's value
            engine.synchronize_one_vertex_wait(vtx);
          }
          std::swap(original_value, new_value);
        }
      }
    };

    /**
     * The type of the context.
     */
    typedef context context_type;

    /// \internal
    typedef context icontext_type;

    /// The type of the update function
    typedef boost::function<void(context_type&, vertex_type)> update_function_type;

  private:
    /// \internal \brief The base type of all schedulers
    message_array<message_type> messages;

    // context needs access to internal functions
    friend struct context;

    /// \internal \brief The type used to refer to vertices in the local graph
    typedef typename graph_type::local_vertex_type    local_vertex_type;
    /// \internal \brief The type used to refer to edges in the local graph
    typedef typename graph_type::local_edge_type      local_edge_type;
    /// \internal \brief The type used to refer to vertex IDs in the local graph
    typedef typename graph_type::lvid_type            lvid_type;

    /// \internal \brief The type of the current engine instantiation
    typedef warp_engine engine_type;

    
    /// The RPC interface
    dc_dist_object<warp_engine> rmi;

    /// A reference to the active graph
    graph_type& graph;

    /// A pointer to the lock implementation
    distributed_chandy_misra<graph_type>* cmlocks;

    /// Per vertex data locks
    std::vector<simple_spinlock> vertexlocks;


    /**
     * \brief A bit indicating if the local gather for that vertex is
     * available.
     */
    dense_bitset has_cache;

    /// Engine threads.
    fiber_group thrgroup;

    //! The scheduler
    ischeduler* scheduler_ptr;

    typedef distributed_aggregator<graph_type, context_type>  aggregator_type;
    aggregator_type aggregator;

    /// Number of kernel threads
    size_t ncpus;
    /// Size of each fiber stack
    size_t stacksize;
    /// Number of fibers
    size_t nfibers;
    /// set to true if engine is started
    bool started;
    /// A pointer to the distributed consensus object
    fiber_async_consensus* consensus;

    /**
     * Used only by the locking subsystem.
     * to allow the fiber to go to sleep when waiting for the locks to
     * be ready.
     */
    struct vertex_fiber_cm_handle {
      mutex lock;
      bool philosopher_ready;
      size_t fiber_handle;
    };
    std::vector<vertex_fiber_cm_handle*> cm_handles;

    dense_bitset program_running;
    dense_bitset hasnext;

    // Various counters.
    atomic<uint64_t> programs_executed;

    timer launch_timer;

    /// Defaults to (-1), defines a timeout
    size_t timed_termination;
 
    /// engine option. Sets to true if factorized consistency is used
    bool factorized_consistency;

    bool endgame_mode;

    /// Time when engine is started
    float engine_start_time;

    /// True when a force stop is triggered (possibly via a timeout)
    bool force_stop;

    graphlab_options opts_copy; // local copy of options to pass to
                                // scheduler construction

    execution_status::status_enum termination_reason;

    std::vector<mutex> aggregation_lock;
    std::vector<std::deque<std::string> > aggregation_queue;

    update_function_type update_fn;
  public:

    /**
     * Constructs an asynchronous consistent distributed engine.
     * The number of threads to create are read from
     * \ref graphlab_options::get_ncpus "opts.get_ncpus()". The scheduler to
     * construct is read from
     * \ref graphlab_options::get_scheduler_type() "opts.get_scheduler_type()".
     * The default scheduler
     * is the queued_fifo scheduler. For details on the scheduler types
     * \see scheduler_types
     *
     *  See the <a href=#engineopts> main class documentation</a> for the
     *  available engine options.
     *
     * \param dc Distributed controller to associate with
     * \param graph The graph to schedule over. The graph must be fully
     *              constructed and finalized.
     * \param opts A graphlab::graphlab_options object containing options and
     *             parameters for the scheduler and the engine.
     */
    warp_engine(distributed_control &dc,
                            graph_type& graph,
                            const graphlab_options& opts = graphlab_options()) :
        rmi(dc, this), graph(graph), scheduler_ptr(NULL),
        aggregator(dc, graph, new context_type(*this, graph)), started(false),
        engine_start_time(timer::approx_time_seconds()), force_stop(false) {
      rmi.barrier();

      nfibers = 10000;
      stacksize = 16384;
      factorized_consistency = true;
      update_fn = NULL;
      timed_termination = (size_t)(-1);
      termination_reason = execution_status::UNSET;
      set_options(opts);
      initialize();
      rmi.barrier();
    }

    /** \internal
     * For the warp engine to find the remote instances of this class
     */
    size_t get_rpc_obj_id() {
      return rmi.get_obj_id();
    }

  private:

    /**
     * \internal
     * Configures the engine with the provided options.
     * The number of threads to create are read from
     * opts::get_ncpus(). The scheduler to construct is read from
     * graphlab_options::get_scheduler_type(). The default scheduler
     * is the queued_fifo scheduler. For details on the scheduler types
     * \see scheduler_types
     */
    void set_options(const graphlab_options& opts) {
      rmi.barrier();
      ncpus = opts.get_ncpus();
      ASSERT_GT(ncpus, 0);
      aggregation_lock.resize(opts.get_ncpus());
      aggregation_queue.resize(opts.get_ncpus());
      std::vector<std::string> keys = opts.get_engine_args().get_option_keys();
      foreach(std::string opt, keys) {
        if (opt == "timeout") {
          opts.get_engine_args().get_option("timeout", timed_termination);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: timeout = " << timed_termination << std::endl;
        } else if (opt == "factorized") {
          opts.get_engine_args().get_option("factorized", factorized_consistency);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: factorized = " << factorized_consistency << std::endl;
        } else if (opt == "nfibers") {
          opts.get_engine_args().get_option("nfibers", nfibers);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: nfibers = " << nfibers << std::endl;
        } else if (opt == "stacksize") {
          opts.get_engine_args().get_option("stacksize", stacksize);
          if (rmi.procid() == 0)
            logstream(LOG_EMPH) << "Engine Option: stacksize= " << stacksize << std::endl;
        } else {
          logstream(LOG_FATAL) << "Unexpected Engine Option: " << opt << std::endl;
        }
      }
      opts_copy = opts;
      // set a default scheduler if none
      if (opts_copy.get_scheduler_type() == "") {
        opts_copy.set_scheduler_type("queued_fifo");
      }

      // construct scheduler passing in the copy of the options from set_options
      scheduler_ptr = scheduler_factory::
                    new_scheduler(graph.num_local_vertices(),
                                  opts_copy);
      rmi.barrier();

      // create initial fork arrangement based on the alternate vid mapping
      if (factorized_consistency == false) {
        cmlocks = new distributed_chandy_misra<graph_type>(rmi.dc(), graph,
                                                    boost::bind(&engine_type::lock_ready, this, _1));
                                                    
      }
      else {
        cmlocks = NULL;
      }

      // construct the termination consensus object
      consensus = new fiber_async_consensus(rmi.dc(), nfibers);
    }

    /**
     * \internal
     * Initializes the engine with respect to the associated graph.
     * This call will initialize all internal and scheduling datastructures.
     * This function must be called prior to any signal function.
     */
    void initialize() {
      // construct all the required datastructures
      // deinitialize performs the reverse
      graph.finalize();
      scheduler_ptr->set_num_vertices(graph.num_local_vertices());
      messages.resize(graph.num_local_vertices());
      vertexlocks.resize(graph.num_local_vertices());
      program_running.resize(graph.num_local_vertices());
      hasnext.resize(graph.num_local_vertices());
      
      if (!factorized_consistency) {
        cm_handles.resize(graph.num_local_vertices());
      }
      rmi.barrier();
    }


  public:
    ~warp_engine() {
      delete consensus;
      delete cmlocks;
      delete scheduler_ptr;
    }


    /**
     * Sets the update function to use for execution.
     * The update function must be of the type void(context_type&, vertex_type),
     * but more generally, may be a 
     * boost::function<void(context_type&, vertex_type)>
     */
    void set_update_function(update_function_type update_function) {
      update_fn = update_function;
    }


    /**
     * \brief Compute the total number of updates (calls to apply)
     * executed since start was last invoked.
     *
     * \return Total number of updates
     */
    size_t num_updates() const {
      return programs_executed.value;
    }


    /**
     * \brief Get the elapsed time in seconds since start was last
     * called.
     * 
     * \return elapsed time in seconds
     */
    float elapsed_seconds() const {
      return timer::approx_time_seconds() - engine_start_time;
    }


    /**
     * \brief Not meaningful for the asynchronous engine. Returns -1.
     */
    int iteration() const { return -1; }


/**************************************************************************
 *                           Signaling Interface                          *
 **************************************************************************/

  private:

    /**
     * \internal
     * This is used to receive a message forwarded from another machine
     */
    void rpc_signal(vertex_id_type vid,
                            const message_type& message) {
      if (force_stop) return;
      const lvid_type local_vid = graph.local_vid(vid);
      double priority;
      messages.add(local_vid, message, &priority);
      scheduler_ptr->schedule(local_vid, priority);
      consensus->cancel();
    }

    /**
     * \internal
     * \brief Signals a vertex with an optional message
     *
     * Signals a vertex, and schedules it to be executed in the future.
     * must be called on a vertex accessible by the current machine.
     */
    void internal_signal(const vertex_type& vtx,
                         const message_type& message = message_type()) {
      if (force_stop) return;
      if (started) {
        const typename graph_type::vertex_record& rec = graph.l_get_vertex_record(vtx.local_id());
        const procid_t owner = rec.owner;
        if (endgame_mode) {
          // fast signal. push to the remote machine immediately
          if (owner != rmi.procid()) {
            const vertex_id_type vid = rec.gvid;
            rmi.remote_call(owner, &engine_type::rpc_signal, vid, message);
          }
          else {
            double priority;
            messages.add(vtx.local_id(), message, &priority);
            scheduler_ptr->schedule(vtx.local_id(), priority);
            consensus->cancel();
          }
        }
        else {

          double priority;
          messages.add(vtx.local_id(), message, &priority);
          scheduler_ptr->schedule(vtx.local_id(), priority);
          consensus->cancel();
        }
      }
      else {
        double priority;
        messages.add(vtx.local_id(), message, &priority);
        scheduler_ptr->schedule(vtx.local_id(), priority);
        consensus->cancel();
      }
    } // end of schedule


    /**
     * \internal
     * \brief Signals a vertex with an optional message
     *
     * Signals a global vid, and schedules it to be executed in the future.
     * If current machine does not contain the vertex, it is ignored.
     */
    void internal_signal_gvid(vertex_id_type gvid,
                              const message_type& message = message_type()) {
      if (force_stop) return;
      if (graph.is_master(gvid)) {
        internal_signal(graph.vertex(gvid), message);
      } else {
        procid_t proc = graph.master(gvid);
        rmi.remote_call(proc, &warp_engine::internal_signal,
                        gvid, message);
      }
    } 


    void rpc_internal_stop() {
      force_stop = true;
      termination_reason = execution_status::FORCED_ABORT;
    }

    /**
     * \brief Force engine to terminate immediately.
     *
     * This function is used to stop the engine execution by forcing
     * immediate termination.
     */
    void internal_stop() {
      for (procid_t i = 0;i < rmi.numprocs(); ++i) {
        rmi.remote_call(i, &warp_engine::rpc_internal_stop);
      }
    }

  public:


    /**
     * \brief Signals single a vertex with an optional message.
     * 
     * This function sends a message to particular vertex which will
     * receive that message on start. The signal function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * engine.signal(0); // signal vertex zero
     * \endcode
     *
     * and _not_:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * if(dc.procid() == 0) engine.signal(0); // signal vertex zero
     * \endcode
     *
     * Since signal is executed synchronously on all machines it
     * should only be used to schedule a small set of vertices. The
     * preferred method to signal a large set of vertices (e.g., all
     * vertices that are a certain type) is to use either the vertex
     * program init function or the aggregation framework.  For
     * example to signal all vertices that have a particular value one
     * could write:
     *
     * \code
     * struct bipartite_opt : 
     *   public graphlab::ivertex_program<graph_type, gather_type> {
     *   // The user defined init function
     *   void init(icontext_type& context, vertex_type& vertex) {
     *     // Signal myself if I am a certain type
     *     if(vertex.data().on_left) context.signal(vertex);
     *   }
     *   // other vastly more interesting code
     * };
     * \endcode
     *
     * @param [in] vid the vertex id to signal
     * @param [in] message the message to send to that vertex.  The
     * default message is sent if no message is provided. 
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    void signal(vertex_id_type gvid,
                const message_type& message = message_type()) {
      rmi.barrier();
      internal_signal_gvid(gvid, message);
      rmi.barrier();
    }

    /**
     * \brief Signal all vertices with a particular message.
     * 
     * This function sends the same message to all vertices which will
     * receive that message on start. The signal_all function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * engine.signal_all(); // signal all vertices
     * \endcode
     *
     * and _not_:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * if(dc.procid() == 0) engine.signal_all(); // signal vertex zero
     * \endcode
     *
     * The signal_all function is the most common way to send messages
     * to the engine.  For example in the pagerank application we want
     * all vertices to be active on the first round.  Therefore we
     * would write:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * engine.signal_all();
     * engine.start();
     * \endcode
     *
     * @param [in] message the message to send to all vertices.  The
     * default message is sent if no message is provided
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    void signal_all(const message_type& message = message_type(),
                    const std::string& order = "shuffle") {
      vertex_set vset = graph.complete_set();
      signal_vset(vset, message, order);
    } // end of schedule all


    /**
     * \brief Signal a set of vertices with a particular message.
     * 
     * This function sends the same message to a set of vertices which will
     * receive that message on start. The signal_vset function must be
     * invoked on all machines simultaneously.  For example:
     *
     * \code
     * graphlab::warp_engine<graph_type> engine(dc, graph, opts);
     * engine.signal_vset(vset); // signal a subset of vertices
     * \endcode
     *
     * signal_all() is conceptually equivalent to:
     *
     * \code
     * engine.signal_vset(graph.complete_set());
     * \endcode
     *
     * @param [in] vset The set of vertices to signal 
     * @param [in] message the message to send to all vertices.  The
     * default message is sent if no message is provided
     * (See ivertex_program::message_type for details about the
     * message_type). 
     */
    void signal_vset(const vertex_set& vset,
                    const message_type& message = message_type(),
                    const std::string& order = "shuffle") {
      logstream(LOG_DEBUG) << rmi.procid() << ": Schedule All" << std::endl;
      // allocate a vector with all the local owned vertices
      // and schedule all of them.
      std::vector<vertex_id_type> vtxs;
      vtxs.reserve(graph.num_local_own_vertices());
      for(lvid_type lvid = 0;
          lvid < graph.get_local_graph().num_vertices();
          ++lvid) {
        if (graph.l_vertex(lvid).owner() == rmi.procid() &&
            vset.l_contains(lvid)) {
          vtxs.push_back(lvid);
        }
      }

      if(order == "shuffle") {
        graphlab::random::shuffle(vtxs.begin(), vtxs.end());
      }
      foreach(lvid_type lvid, vtxs) {
        double priority;
        messages.add(lvid, message, &priority);
        scheduler_ptr->schedule(lvid, priority);
      }
      rmi.barrier();
    }


  private: 

    /**
     * Gets a task from the scheduler and the associated message
     */
    sched_status::status_enum get_next_sched_task(size_t threadid,
                                                  lvid_type& lvid,
                                                  message_type& msg) {
      while (1) {
        sched_status::status_enum stat = 
            scheduler_ptr->get_next(threadid % ncpus, lvid);
        if (stat == sched_status::NEW_TASK) {
          if (messages.get(lvid, msg)) return stat;
          else continue;
        }
        return stat;
      }
    }

    void set_endgame_mode() {
        if (!endgame_mode) logstream(LOG_EMPH) << "Endgame mode\n";
        endgame_mode = true;
        rmi.dc().set_fast_track_requests(true);
    } 

    /**
     * \internal
     * Called when get_a_task returns no internal task not a scheduler task.
     * This rechecks the status of the internal task queue and the scheduler
     * inside a consensus critical section.
     */
    bool try_to_quit(size_t threadid,
                     bool& has_sched_msg,
                     lvid_type& sched_lvid,
                     message_type &msg) {
      if (timer::approx_time_seconds() - engine_start_time > timed_termination) {
        termination_reason = execution_status::TIMEOUT;
        force_stop = true;
      }
      logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid << ": " << "Termination Attempt " << std::endl;
      has_sched_msg = false;
      fiber_control::yield();
      consensus->begin_done_critical_section(threadid);
      sched_status::status_enum stat = 
          get_next_sched_task(threadid, sched_lvid, msg);
      if (stat == sched_status::EMPTY || force_stop) {
        logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tTermination Double Checked" << std::endl;

        if (!endgame_mode) logstream(LOG_EMPH) << "Endgame mode\n";
        endgame_mode = true;
        // put everyone in endgame
        for (procid_t i = 0;i < rmi.dc().numprocs(); ++i) {
          rmi.remote_call(i, &warp_engine::set_endgame_mode);
        } 
        bool ret = consensus->end_done_critical_section(threadid);
        if (ret == false) {
          logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tCancelled" << std::endl;
        } else {
          logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tDying" << " (" << fiber_control::get_tid() << ")" << std::endl;
        }
        return ret;
      } else {
        logstream(LOG_DEBUG) << rmi.procid() << "-" << threadid <<  ": "
                             << "\tCancelled by Scheduler Task" << std::endl;
        consensus->cancel_critical_section(threadid);
        has_sched_msg = true;
        return false;
      }
    } // end of try to quit


    /**
     * \internal
     * When all distributed locks are acquired, this function is called
     * from the chandy misra implementation on the master vertex.
     * Here, we perform initialization
     * of the task and switch the vertex to a gathering state
     */
    void lock_ready(lvid_type lvid) {
      cm_handles[lvid]->lock.lock();
      cm_handles[lvid]->philosopher_ready = true;
      fiber_control::schedule_tid(cm_handles[lvid]->fiber_handle);
      cm_handles[lvid]->lock.unlock();
    }


    // make sure I am the only person running.
    // if returns false, the message has been dropped into the message array.
    // quit
    bool get_exclusive_access_to_vertex(const lvid_type lvid,
                                        const message_type& msg) {
      vertexlocks[lvid].lock();
      bool someone_else_running = program_running.set_bit(lvid);
      if (someone_else_running) {
        // bad. someone else is here.
        // drop it into the message array
        messages.add(lvid, msg);
        hasnext.set_bit(lvid);
      } 
      vertexlocks[lvid].unlock();
      return !someone_else_running;
    }


    // make sure I am the only person running.
    // if returns false, the message has been dropped into the message array.
    // quit
    void release_exclusive_access_to_vertex(const lvid_type lvid) {
      vertexlocks[lvid].lock();
      // someone left a next message for me
      // reschedule it at high priority
      if (hasnext.get(lvid)) {
        scheduler_ptr->schedule(lvid, 10000.0);
        consensus->cancel();
        hasnext.clear_bit(lvid);
      }
      program_running.clear_bit(lvid);
      vertexlocks[lvid].unlock();
    }

    void update_vertex_value(vertex_id_type vid,
                             vertex_data_type& vdata) {
      local_vertex_type lvtx(graph.l_vertex(graph.local_vid(vid)));
      lvtx.data() = vdata;
    }

    void synchronize_one_vertex(vertex_type vtx) {
      local_vertex_type lvtx(vtx);
      foreach(procid_t mirror, lvtx.mirrors()) {
        rmi.remote_call(mirror, &warp_engine::update_vertex_value, vtx.id(), vtx.data());
      }
    }


    void synchronize_one_vertex_wait(vertex_type vtx) {
      local_vertex_type lvtx(vtx);
      std::vector<request_future<void> > futures;
      foreach(procid_t mirror, lvtx.mirrors()) {
        futures.push_back(object_fiber_remote_request(rmi, 
                                                      mirror, 
                                                      &warp_engine::update_vertex_value, 
                                                      vtx.id(), 
                                                      vtx.data()));
      }
      for (size_t i = 0;i < futures.size(); ++i) {
        futures[i]();
      }
    }

    /**
     * \internal
     * Called when the scheduler returns a vertex to run.
     * If this function is called with vertex locks acquired, prelocked
     * should be true. Otherwise it should be false.
     */
    void eval_sched_task(const lvid_type lvid,
                         const message_type& msg) {
      const typename graph_type::vertex_record& rec = graph.l_get_vertex_record(lvid);
      vertex_id_type vid = rec.gvid;
      // if this is another machine's forward it
      if (rec.owner != rmi.procid()) {
        rmi.remote_call(rec.owner, &engine_type::rpc_signal, vid, msg);
        return;
      }
      // I have to run this myself
      
      if (!get_exclusive_access_to_vertex(lvid, msg)) return;

      /**************************************************************************/
      /*                             Acquire Locks                              */
      /**************************************************************************/
      if (!factorized_consistency) {
        // begin lock acquisition
        cm_handles[lvid] = new vertex_fiber_cm_handle;
        cm_handles[lvid]->philosopher_ready = false;
        cm_handles[lvid]->fiber_handle = fiber_control::get_tid();
        cmlocks->make_philosopher_hungry(lvid);
        cm_handles[lvid]->lock.lock();
        while (!cm_handles[lvid]->philosopher_ready) {
          fiber_control::deschedule_self(&(cm_handles[lvid]->lock.m_mut));
          cm_handles[lvid]->lock.lock();
        }
        cm_handles[lvid]->lock.unlock();
      }

      
      local_vertex_type l_vtx(graph.l_vertex(lvid));
      local_vertex_type vtx(l_vtx);


      context ctx(*this, graph, vtx);
      update_fn(ctx, vtx);
      ctx.synchronize();
      /************************************************************************/
      /*                           Release Locks                              */
      /************************************************************************/
      // cleanup
      if (!factorized_consistency) {
        cmlocks->philosopher_stops_eating(lvid);
        delete cm_handles[lvid];
        cm_handles[lvid] = NULL;
      }
      release_exclusive_access_to_vertex(lvid);
      programs_executed.inc(); 
    }


    /**
     * \internal
     * Per thread main loop
     */
    void thread_start(size_t threadid) {
      bool has_sched_msg = false;
      std::vector<std::vector<lvid_type> > internal_lvid;
      lvid_type sched_lvid;

      message_type msg;
      float last_aggregator_check = timer::approx_time_seconds();
      timer ti; ti.start();
      while(1) {
        if (timer::approx_time_seconds() != last_aggregator_check && !endgame_mode) {
          last_aggregator_check = timer::approx_time_seconds();
          std::string key = aggregator.tick_asynchronous();
          if (key != "") {
            for (size_t i = 0;i < aggregation_lock.size(); ++i) {
              aggregation_lock[i].lock();
              aggregation_queue[i].push_back(key);
              aggregation_lock[i].unlock();
            }
          }
        }

        // test the aggregator
        while(!aggregation_queue[fiber_control::get_worker_id()].empty()) {
          size_t wid = fiber_control::get_worker_id();
          ASSERT_LT(wid, ncpus);
          aggregation_lock[wid].lock();
          std::string key = aggregation_queue[wid].front();
          aggregation_queue[wid].pop_front();
          aggregation_lock[wid].unlock();
          aggregator.tick_asynchronous_compute(wid, key);
        }

        sched_status::status_enum stat = get_next_sched_task(threadid, sched_lvid, msg);


        has_sched_msg = stat != sched_status::EMPTY;
        if (stat != sched_status::EMPTY) {
          eval_sched_task(sched_lvid, msg);
          if (endgame_mode) rmi.dc().flush();
        }
        else if (!try_to_quit(threadid, has_sched_msg, sched_lvid, msg)) {
          /*
           * We failed to obtain a task, try to quit
           */
          if (has_sched_msg) {
            eval_sched_task(sched_lvid, msg);
          }
        } else { 
          break; 
        }
        if (fiber_control::worker_has_priority_fibers_on_queue()) fiber_control::yield();
      }
    } // end of thread start

/**************************************************************************
 *                         Main engine start()                            *
 **************************************************************************/

  public:


    /**
      * \brief Start the engine execution.
      *
      * This function starts the engine and does not
      * return until the scheduler has no tasks remaining.
      *
      * \return the reason for termination
      */
    execution_status::status_enum start() {
      bool old_fasttrack = rmi.dc().set_fast_track_requests(false);
      logstream(LOG_INFO) << "Spawning " << nfibers << " threads" << std::endl;
      ASSERT_TRUE(scheduler_ptr != NULL);
      consensus->reset();

      // now. It is of critical importance that we match the number of 
      // actual workers
     

      // start the aggregator
      aggregator.start(ncpus);
      aggregator.aggregate_all_periodic();

      started = true;

      rmi.barrier();
      size_t allocatedmem = memory_info::allocated_bytes();
      rmi.all_reduce(allocatedmem);

      engine_start_time = timer::approx_time_seconds();
      force_stop = false;
      endgame_mode = false;
      programs_executed = 0;
      launch_timer.start();

      termination_reason = execution_status::RUNNING;
      if (rmi.procid() == 0) {
        logstream(LOG_INFO) << "Total Allocated Bytes: " << allocatedmem << std::endl;
      }
      fiber_group::affinity_type affinity;
      affinity.clear();
      for (size_t i = 0; i < ncpus; ++i) {
        affinity.set_bit(i);
      }
      thrgroup.set_affinity(affinity);
      thrgroup.set_stacksize(stacksize);

      for (size_t i = 0; i < nfibers ; ++i) {
        thrgroup.launch(boost::bind(&engine_type::thread_start, this, i));
      }
      thrgroup.join();
      aggregator.stop();
      // if termination reason was not changed, then it must be depletion
      if (termination_reason == execution_status::RUNNING) {
        termination_reason = execution_status::TASK_DEPLETION;
      }

      size_t ctasks = programs_executed.value;
      rmi.all_reduce(ctasks);
      programs_executed.value = ctasks;

      rmi.cout() << "Completed Tasks: " << programs_executed.value << std::endl;


      size_t numjoins = messages.num_joins();
      rmi.all_reduce(numjoins);
      rmi.cout() << "Schedule Joins: " << numjoins << std::endl;

      size_t numadds = messages.num_adds();
      rmi.all_reduce(numadds);
      rmi.cout() << "Schedule Adds: " << numadds << std::endl;


      ASSERT_TRUE(scheduler_ptr->empty());
      started = false;

      rmi.dc().set_fast_track_requests(old_fasttrack);
      return termination_reason;
    } // end of start


  public:
    aggregator_type* get_aggregator() { return &aggregator; }

  }; // end of class

} // namespace warp
} // namespace

#include <graphlab/macros_undef.hpp>
#include <graphlab/engine/warp_graph_broadcast.hpp>
#include <graphlab/engine/warp_graph_mapreduce.hpp>
#include <graphlab/engine/warp_graph_transform.hpp>
#endif 


================================================
FILE: src/graphlab/engine/warp_graph_broadcast.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_WARP_GRAPH_BROADCAST_HPP
#define GRAPHLAB_WARP_GRAPH_BROADCAST_HPP

#include <boost/bind.hpp>
#include <graphlab/util/generics/conditional_combiner_wrapper.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

namespace warp {

namespace warp_impl {

template <typename EngineType, typename GraphType>
struct broadcast_neighborhood_impl {

  typedef typename EngineType::context_type context_type;
  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::vertex_data_type vertex_data_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;


/**************************************************************************/
/*                                                                        */
/*              Basic MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls basic_mapreduce_neighborhood.
 * Which then issues calls to basic_local_mapper on each machine with a replica.
 */

  static void basic_local_broadcast_neighborhood(context_type& context, 
                                                 edge_dir_type edge_direction,
                                                 void(*broadcast_fn)(context_type& context,
                                                                     edge_type edge,
                                                                     vertex_type other),
                                                 vertex_id_type vid) {
    GraphType& graph(context.graph);
    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(context.graph.l_vertex(lvid));
   
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        broadcast_fn(context, edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        broadcast_fn(context, edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
  }


  static void basic_local_broadcast_neighborhood_from_remote(std::pair<size_t, size_t> objid,
                                                             edge_dir_type edge_direction,
                                                             size_t broadcast_ptr,
                                                             vertex_id_type vid,
                                                             vertex_data_type& vdata) {
    EngineType* engine = reinterpret_cast<EngineType*>(distributed_control::get_instance()->get_registered_object(objid.first));
    GraphType* graph = reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid.second));
    vertex_type vertex(graph->l_vertex(graph->local_vid(vid)));
    context_type context(*engine, *graph, vertex);
    vertex.data() = vdata;
    // cast the mappers and combiners back into their pointer types
    void(*broadcast_fn)(context_type&, edge_type edge, vertex_type other) = 
        reinterpret_cast<void(*)(context_type&, edge_type, vertex_type)>(broadcast_ptr);
    basic_local_broadcast_neighborhood(
        context,
        edge_direction,
        broadcast_fn,
        vid);
  }

  static void basic_broadcast_neighborhood(context_type& context,
                                           typename GraphType::vertex_type current,
                                              edge_dir_type edge_direction,
                                              void(*broadcast_fn)(context_type& context, edge_type edge, vertex_type other)) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    // get the object ID of the graph
    std::pair<size_t, size_t> objid(context.engine.get_rpc_obj_id(), graph.get_rpc_obj_id());
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<void > > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
        requests[ctr] = fiber_remote_request(proc, 
                                             basic_local_broadcast_neighborhood_from_remote,
                                             objid,
                                             edge_direction,
                                             reinterpret_cast<size_t>(broadcast_fn),
                                             current.id(),
                                             current.data());
        ++ctr;
    }
    // compute the local tasks
    basic_local_broadcast_neighborhood(context,
                                       edge_direction, 
                                       broadcast_fn, 
                                       current.id());
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      requests[i]();
    }
  }
};


template <typename EngineType, typename GraphType, typename ExtraArg>
struct broadcast_neighborhood_impl2 {

  typedef typename EngineType::context_type context_type;
  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::vertex_data_type vertex_data_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;

/**************************************************************************/
/*                                                                        */
/*           Extended MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls extended_mapreduce_neighborhood.
 * Which then issues calls to extended_local_mapper on each machine with a replica.
 * The extended mapreduce neighborhood allows the mapper and combiner to take
 * an optional argument
 */


  static void extended_local_broadcast_neighborhood(context_type& context,
                                 edge_dir_type edge_direction,
                                 void(*broadcast_fn)(context_type& context,
                                                     edge_type edge,
                                                     vertex_type other,
                                                     const ExtraArg extra),
                                 vertex_id_type vid,
                                 const ExtraArg extra) {
    GraphType& graph(context.graph);
    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(graph.l_vertex(lvid));
    
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        broadcast_fn(context, edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        broadcast_fn(context, edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
  }


  static void extended_local_broadcast_neighborhood_from_remote(std::pair<size_t, size_t> objid,
                                                                edge_dir_type edge_direction,
                                                                size_t broadcast_ptr,
                                                                vertex_id_type vid,
                                                                vertex_data_type& vdata,
                                                                const ExtraArg extra) {

    EngineType* engine = reinterpret_cast<EngineType*>(distributed_control::get_instance()->get_registered_object(objid.first));
    GraphType* graph = reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid.second));
    vertex_type vertex(graph->l_vertex(graph->local_vid(vid)));
    context_type context(*engine, *graph, vertex);
    vertex.data() = vdata;
    // cast the mappers and combiners back into their pointer types
    void(*broadcast_fn)(context_type&, edge_type edge, vertex_type other, const ExtraArg) = 
        reinterpret_cast<void(*)(context_type&, edge_type, vertex_type, const ExtraArg)>(broadcast_ptr);
    extended_local_broadcast_neighborhood(
        context,
        edge_direction,
        broadcast_fn,
        vid,
        extra);
  }

  static void extended_broadcast_neighborhood(context_type& context,
                                              typename GraphType::vertex_type current,
                                              edge_dir_type edge_direction,
                                              void(*broadcast_fn)(context_type& context, edge_type edge, vertex_type other, const ExtraArg extra),
                                              const ExtraArg extra) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    // get the object ID of the graph
    std::pair<size_t, size_t> objid(context.engine.get_rpc_obj_id(), graph.get_rpc_obj_id());
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<void> > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
        requests[ctr] = fiber_remote_request(proc, 
                                             extended_local_broadcast_neighborhood_from_remote,
                                             objid,
                                             edge_direction,
                                             reinterpret_cast<size_t>(broadcast_fn),
                                             current.id(),
                                             current.data(),
                                             extra);
        ++ctr;
    }
    // compute the local tasks
    extended_local_broadcast_neighborhood(context, 
                                          edge_direction, 
                                          broadcast_fn, 
                                          current.id(),
                                          extra);
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      requests[i]();
    }
  }


};

} // namespace warp::warp_impl


/**
 * \ingroup warp
 *
 * the broadcast_neighborhood function allows a parallel transformation of the
 * neighborhood of a vertex to be performed and also provides a warp_engine
 * context. . This is a blocking operation, and
 * will not return until the distributed computation is complete.  When run
 * inside a fiber, to hide latency, the system will automatically context
 * switch to evaluate some other fiber which is ready to run. This function 
 * is functionally similar to the transform_neighborhood function, but requires
 * a \ref graphlab::warp_engine "warp_engine" 
 * context to be provided. The warp_engine context will also be passed on to
 * the transform function.
 *
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * foreach(edge in neighborhood of current vertex) {
 *   transform_fn(context, edge, opposite vertex)
 * }
 * \endcode
 *
 * \attention It is important that the transform_fn should only make
 * modifications to the edge data, and not the data on either of the vertices.
 *
 * \attention Unlike the transform_neighborhood function, this call actually
 * performs synchronization, so the value of both vertex endpoints are
 * correct. 
 *
 * Here is an example which schedules all vertices on out edges.
 * 
 * \code
 * void schedule(engine_type::context& context,
 *               graph_type::edge_type edge, 
 *               graph_type::vertex_type other) {
 *   context.signal(other);
 * }
 *
 *
 * void update_function(engine_type::context& context,
 *                      graph_type::vertex_type vertex) {
 *    warp::broadcast_neighborhood(context,
 *                                 vertex,
 *                                 OUT_EDGES,
 *                                 schedule);
 * }
 *
 * \endcode
 *
 *
 * An overload is provided which allows you to pass an additional arbitrary
 * argument to the broadcast.
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param transform_fn The transform function to run on all the selected edges.
 * 
 * \see warp_engine
 * \see warp::parfor_all_vertices
 * \see warp::map_reduce_neighborhood
 * \see warp::transform_neighborhood
 */
template <typename ContextType, typename VertexType>
void broadcast_neighborhood(ContextType& context,
                            VertexType current,
                                edge_dir_type edge_direction,
                                void (*broadcast_fn)(ContextType& context,
                                                     typename VertexType::graph_type::edge_type edge,
                                                        VertexType other)) {
  warp_impl::
      broadcast_neighborhood_impl<typename ContextType::engine_type, typename VertexType::graph_type>::
                                      basic_broadcast_neighborhood(context, current, edge_direction, 
                                                                   broadcast_fn);
  context.set_synchronized();
}


/**
 * \ingroup warp
 *
 * the broadcast_neighborhood function allows a parallel transformation of the
 * neighborhood of a vertex to be performed and also provides a warp_engine
 * context. . This is a blocking operation, and
 * will not return until the distributed computation is complete.  When run
 * inside a fiber, to hide latency, the system will automatically context
 * switch to evaluate some other fiber which is ready to run. This function 
 * is functionally similar to the transform_neighborhood function, but requires
 * a \ref graphlab::warp_engine "warp_engine" 
 * context to be provided. The \ref graphlab::warp_engine::context 
 * "warp_engine context" will also be passed on to
 * the transform function.
 *
 * This is the more general overload of the broadcast_neighborhood function
 * which allows an additional arbitrary extra argument to be passed along
 * to the transform function
 *
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * foreach(edge in neighborhood of current vertex) {
 *   transform_fn(context, edge, opposite vertex, extraarg)
 * }
 * \endcode
 *
 * \attention It is important that the transform_fn should only make
 * modifications to the edge data, and not the data on either of the vertices.
 *
 * \attention Unlike the transform_neighborhood function, this call actually
 * performs synchronization, so the value of both vertex endpoints are
 * correct. 
 *
 * Here is an example which schedules all vertices on out edges with a 
 * particular value
 * \code
 * void schedule(engine_type::context& context,
 *               graph_type::edge_type edge, 
 *               graph_type::vertex_type other,
 *               int value) {
 *   if (edge.data() == value)  context.signal(other);
 * }
 *
 *
 * void update_function(engine_type::context& context,
 *                      graph_type::vertex_type vertex) {
 *    // schedule all neighbors connected to an out edge 
 *    // with value 10
 *    warp::broadcast_neighborhood(context,
 *                                 vertex,
 *                                 OUT_EDGES,
 *                                 int(10),
 *                                 schedule));
 * }
 *
 * \endcode
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param extra An additional argument to be passed to the broadcast
 * \param transform_fn The transform function to run on all the selected edges.
 * 
 * \see warp_engine
 * \see warp::parfor_all_vertices()
 * \see warp::map_reduce_neighborhood()
 * \see warp::transform_neighborhood()
 */
template <typename ContextType, typename ExtraArg, typename VertexType>
void broadcast_neighborhood(ContextType& context,
                            VertexType current,
                                edge_dir_type edge_direction,
                                void(*broadcast_fn)(ContextType& context,
                                                    typename VertexType::graph_type::edge_type edge,
                                                        VertexType other,
                                                        const ExtraArg extra),
                                const ExtraArg extra) {
  warp_impl::
      broadcast_neighborhood_impl2<typename ContextType::engine_type, typename VertexType::graph_type, ExtraArg>::
                                      extended_broadcast_neighborhood(context, current, edge_direction, broadcast_fn, extra);
  context.set_synchronized();
}


} // namespace warp

} // namespace graphlab

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/engine/warp_graph_mapreduce.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_WARP_GRAPH_MAP_REDUCE_HPP
#define GRAPHLAB_WARP_GRAPH_MAP_REDUCE_HPP

#include <boost/bind.hpp>
#include <graphlab/util/generics/conditional_combiner_wrapper.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

namespace warp {

namespace warp_impl {

/**
 * The default combiner used for combining mapped results from
 * warp::map_reduce_neighborhood(); merges self with other using operator +=. 
 */
template <typename T>
void default_combiner(T& self, const T& other) {
  self += other;
}

/**
 * The default combiner used for combining mapped results from
 * warp::map_reduce_neighborhood() which
 * takes an unused argument; merges self with other using operator +=. 
 */
template <typename T, typename ExtraArgs>
void extended_default_combiner(T& self, const T& other, const ExtraArgs& unused) {
  self += other;
}


template <typename RetType, typename GraphType>
struct map_reduce_neighborhood_impl {

  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;


/**************************************************************************/
/*                                                                        */
/*              Basic MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls basic_mapreduce_neighborhood.
 * Which then issues calls to basic_local_mapper on each machine with a replica.
 */

  static conditional_combiner_wrapper<RetType> basic_local_mapper(GraphType& graph,
                                                           edge_dir_type edge_direction,
                                                           RetType (*mapper)(edge_type edge, vertex_type other),
                                                           void (*combiner)(RetType&, const RetType&),
                                                           vertex_id_type vid) {
    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(graph.l_vertex(lvid));
    
    conditional_combiner_wrapper<RetType> accum(combiner);
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        accum += mapper(edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        accum += mapper(edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    return accum;
  }


  static conditional_combiner_wrapper<RetType> basic_local_mapper_from_remote(size_t objid,
                                                           edge_dir_type edge_direction,
                                                           size_t mapper_ptr,
                                                           size_t combiner_ptr,
                                                           vertex_id_type vid) {
    // cast the mappers and combiners back into their pointer types
    RetType (*mapper)(edge_type edge, vertex_type other) = 
        reinterpret_cast<RetType(*)(edge_type, vertex_type)>(mapper_ptr);
    void (*combiner)(RetType&, const RetType&) = 
        reinterpret_cast<void (*)(RetType&, const RetType&)>(combiner_ptr);
    return basic_local_mapper(
        *reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid)),
        edge_direction,
        mapper,
        combiner,
        vid);
  }

  static RetType basic_map_reduce_neighborhood(typename GraphType::vertex_type current,
                                               edge_dir_type edge_direction,
                                               RetType (*mapper)(edge_type edge,
                                                                 vertex_type other),
                                               void (*combiner)(RetType& self, 
                                                                const RetType& other)) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    // get the object ID of the graph
    size_t objid = graph.get_rpc_obj_id();
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<conditional_combiner_wrapper<RetType> > > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
        requests[ctr] = fiber_remote_request(proc, 
                                             map_reduce_neighborhood_impl<RetType, GraphType>::basic_local_mapper_from_remote,
                                             objid,
                                             edge_direction,
                                             reinterpret_cast<size_t>(mapper),
                                             reinterpret_cast<size_t>(combiner),
                                             current.id());
        ++ctr;
    }
    // compute the local tasks
    conditional_combiner_wrapper<RetType> accum = basic_local_mapper(graph, 
                                                                     edge_direction, 
                                                                     mapper, 
                                                                     combiner,
                                                                     current.id());
    accum.set_combiner(combiner);
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      accum += requests[i]();
    }
    return accum.value;
  }

};

/**************************************************************************/
/*                                                                        */
/*           Extended MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls extended_mapreduce_neighborhood.
 * Which then issues calls to extended_local_mapper on each machine with a replica.
 * The extended mapreduce neighborhood allows the mapper and combiner to take
 * an optional argument
 */

template <typename RetType, typename GraphType, typename ExtraArg>
struct map_reduce_neighborhood_impl2 {

  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;

  static conditional_combiner_wrapper<RetType> extended_local_mapper(GraphType& graph,
                                                              edge_dir_type edge_direction,
                                                              RetType (*mapper)(edge_type edge, vertex_type other, const ExtraArg),
                                                              void (*combiner)(RetType&, const RetType&, const ExtraArg),
                                                              vertex_id_type vid,
                                                              const ExtraArg extra) {

    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(graph.l_vertex(lvid));
    
    conditional_combiner_wrapper<RetType> accum(boost::bind(combiner, _1, _2, extra));
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();

        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        accum += mapper(edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();

        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        accum += mapper(edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    return accum;
  }

  static conditional_combiner_wrapper<RetType> extended_local_mapper_from_remote(size_t objid,
                                                              edge_dir_type edge_direction,
                                                              size_t mapper_ptr,
                                                              size_t combiner_ptr,
                                                              vertex_id_type vid,
                                                              const ExtraArg extra) {
    // cast the mappers and combiners back into their pointer types
    RetType (*mapper)(edge_type edge, vertex_type other, const ExtraArg) = 
        reinterpret_cast<RetType(*)(edge_type, vertex_type, const ExtraArg)>(mapper_ptr);
    void (*combiner)(RetType&, const RetType&, const ExtraArg) = 
        reinterpret_cast<void (*)(RetType&, const RetType&, const ExtraArg)>(combiner_ptr);
    return extended_local_mapper(
        *reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid)),
        edge_direction,
        mapper,
        combiner,
        vid,
        extra);
  }

  static RetType extended_map_reduce_neighborhood(typename GraphType::vertex_type current,
                                                  edge_dir_type edge_direction,
                                                  const ExtraArg extra,
                                                  RetType (*mapper)(edge_type edge,
                                                                    vertex_type other,
                                                                    const ExtraArg extra),
                                                  void (*combiner)(RetType& self, 
                                                                   const RetType& other,
                                                                   const ExtraArg extra)) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());
    size_t objid = graph.get_rpc_obj_id();

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<conditional_combiner_wrapper<RetType> > > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
      requests[ctr] = fiber_remote_request(proc, 
                                           map_reduce_neighborhood_impl2::extended_local_mapper_from_remote,
                                           objid,
                                           edge_direction,
                                           reinterpret_cast<size_t>(mapper),
                                           reinterpret_cast<size_t>(combiner),
                                           current.id(),
                                           extra);
        ++ctr;
    }
    // compute the local tasks
    conditional_combiner_wrapper<RetType> accum = 
        extended_local_mapper(graph, edge_direction, mapper, 
                              combiner, current.id(), extra);

    accum.set_combiner(boost::bind(combiner, _1, _2, boost::ref(extra)));
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      accum += requests[i]();
    }
    return accum.value;
  }

};

} // namespace warp::warp_impl

/**
 * \ingroup warp
 *
 * This Warp function allows a map-reduce aggregation of the neighborhood of a
 * vertex to be performed. This is a blocking operation, and will not return
 * until the distributed computation is complete.  When run inside a fiber, to
 * hide latency, the system will automatically context switch to evaluate some
 * other fiber which is ready to run. 
 *
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * ResultType result()
 * foreach(edge in neighborhood of current vertex) {
 *   combiner(result, mapper(edge, opposite vertex))
 * }
 * return result
 * \endcode
 *
 * \attention This call does not accomplish synchronization, thus 
 * modifications to the current vertex will not be reflected during
 * the call. In other words, inside the mapper function, only the values on
 * edge.data() and other.data() will be valid. The value of the vertex
 * on the "self" end of the edge will not reflect changes you made to the vertex
 * immediately before calling warp::map_reduce_neighborhood(). Use the overload
 * of map_reduce_neighborhood (below) if you want to pass on additional
 * information to the mapper.
 *
 * Here is an example which implements the PageRank computation, using
 * the parfor_all_vertices function to create a parallel for loop using fibers.
 * \code
 * float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
 *  return other.data() / other.num_out_edges();
 * }
 *
 * // the function arguments of the combiner must match the return type of the
 * // map function.
 * void pagerank_combine(float& a, const float& b) {
 *   a += b;
 * }
 *
 * void pagerank(graph_type::vertex_type vertex) {
 *    // computes an aggregate over the neighborhood using map_reduce_neighborhood
 *    // The pagerank_map function will be executed over every in-edge of the graph,
 *    // and the result  of each map is combined using the pagerank_combine 
 *    // function. The pagerank_combine is not strictly necessary here since the
 *    // default combine behavior is to use += anyway.
 *    vertex.data() = 0.15 + 0.85 * warp::map_reduce_neighborhood(vertex,
 *                                                                IN_EDGES,
 *                                                                pagerank_map,
 *                                                                pagerank_combine);
 * }
 *
 * ...
 * // runs the pagerank function on all the vertices in the graph.
 * parfor_all_vertices(graph, pagerank); 
 * \endcode
 *
 * An overload is provided which allows you to pass an additional arbitrary
 * argument to the mappers and combiners.
 *
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param mapper The map function that will be executed. Must be a function pointer.
 * \param combiner The combine function that will be executed. Must be a function pointer.
 *                 Optional. Defaults to using "+=" on the output of the mapper
 * 
 * \return The result of the neighborhood map reduce operation. The return
 * type matches the return type of the mapper.
 *
 * \see warp_engine
 * \see warp::parfor_all_vertices()
 * \see warp::transform_neighborhood()
 * \see warp::broadcast_neighborhood()
 */
template <typename RetType, typename VertexType>
RetType map_reduce_neighborhood(VertexType current,
                                edge_dir_type edge_direction,
                                RetType (*mapper)(typename VertexType::graph_type::edge_type edge,
                                                  VertexType other),
                                void (*combiner)(RetType& self, 
                                                 const RetType& other) = warp_impl::default_combiner<RetType>) {
  return warp_impl::
      map_reduce_neighborhood_impl<RetType, 
                                  typename VertexType::graph_type>::
                                      basic_map_reduce_neighborhood(current, edge_direction, 
                                                                    mapper, combiner);
}


/**
 * \ingroup warp
 *
 * This Warp function allows a map-reduce aggregation of the neighborhood of a
 * vertex to be performed. This is a blocking operation, and will not return
 * until the distributed computation is complete.  When run inside a fiber, to
 * hide latency, the system will automatically context switch to evaluate some
 * other fiber which is ready to run. 
 *
 * This is the more general overload of the map_reduce_neighborhood function
 * which allows an additional arbitrary extra argument to be passed along
 * to the mapper and combiner functions.
 * 
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * ResultType result()
 * foreach(edge in neighborhood of current vertex) {
 *   combiner(result, mapper(edge, opposite vertex, extraarg), extraarg)
 * }
 * return result
 * \endcode
 *
 * Here is an example which implements the PageRank computation, using
 * the parfor_all_vertices function to create a parallel for loop using fibers.
 * We demonstrate the additional argument by passing on the 0.85 scaling value to
 * be computed in the map.
 * \code
 * float pagerank_map(graph_type::edge_type edge, 
 *                    graph_type::vertex_type other, const float scale) {
 *  // the scale value here will match the last argument passed into
 *  // the map_reduce_neighborhood call. In this case, it is fixed to
 *  // 0.85.
 *  return scale * other.data() / other.num_out_edges();
 * }
 *
 * // the function arguments of the combiner must match the return type of the
 * // map function.
 * void pagerank_combine(float& a, const float& b, const float _scale_unused) {
 *   a += b;
 * }
 *
 * void pagerank(graph_type::vertex_type vertex) {
 *    // computes an aggregate over the neighborhood using map_reduce_neighborhood
 *    // The pagerank_map function will be executed over every in-edge of the graph,
 *    // and the result  of each map is combined using the pagerank_combine 
 *    // function. The pagerank_combine is not strictly necessary here since the
 *    // default combine behavior is to use += anyway.
 *    vertex.data() = 0.15 + warp::map_reduce_neighborhood(vertex,
 *                                                         IN_EDGES,
 *                                                         float(0.85),
 *                                                         pagerank_map,
 *                                                         pagerank_combine);
 * }
 *
 * ...
 * // runs the pagerank function on all the vertices in the graph.
 * parfor_all_vertices(graph, pagerank); 
 * \endcode
 *
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param extra An additional argument to be passed to the mapper and combiner
 * functions.  
 * \param mapper The map function that will be executed. Must be a
 * function pointer.
 * \param combiner The combine function that will be executed. Must be a
 * function pointer.  Optional. Defaults to using "+=" on the output of the
 * mapper \return The result of the neighborhood map reduce operation. The
 * return type matches the return type of the mapper.
 *
 * \see warp_engine
 * \see warp::parfor_all_vertices()
 * \see warp::transform_neighborhood()
 * \see warp::broadcast_neighborhood()
 */
template <typename RetType, typename ExtraArg, typename VertexType>
RetType map_reduce_neighborhood(VertexType current,
                                edge_dir_type edge_direction,
                                const ExtraArg extra,
                                RetType (*mapper)(typename VertexType::graph_type::edge_type edge,
                                                  VertexType other,
                                                  const ExtraArg extra),
                                void (*combiner)(RetType& self, 
                                                 const RetType& other,
                                                 const ExtraArg extra) = warp_impl::extended_default_combiner<RetType, ExtraArg>) {
  return warp_impl::
      map_reduce_neighborhood_impl2<RetType, typename VertexType::graph_type, ExtraArg>::
                                      extended_map_reduce_neighborhood(current, edge_direction, 
                                                                       extra, 
                                                                       mapper, combiner);
}


} // namespace warp

} // namespace graphlab

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/engine/warp_graph_transform.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_WARP_GRAPH_TRANSFORM_HPP
#define GRAPHLAB_WARP_GRAPH_TRANSFORM_HPP

#include <boost/bind.hpp>
#include <graphlab/util/generics/conditional_combiner_wrapper.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/parallel/fiber_remote_request.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

namespace warp {

namespace warp_impl {

template <typename GraphType>
struct transform_neighborhood_impl {

  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;


/**************************************************************************/
/*                                                                        */
/*              Basic MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls basic_mapreduce_neighborhood.
 * Which then issues calls to basic_local_mapper on each machine with a replica.
 */

  static void basic_local_transform_neighborhood(GraphType& graph,
                                                 edge_dir_type edge_direction,
                                                 void(*transform_fn)(edge_type edge,
                                                                     vertex_type other),
                                                 vertex_id_type vid) {
    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(graph.l_vertex(lvid));
    
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        transform_fn(edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        transform_fn(edge, other);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
  }


  static void basic_local_transform_neighborhood_from_remote(size_t objid,
                                                             edge_dir_type edge_direction,
                                                             size_t transform_ptr,
                                                             vertex_id_type vid) {
    // cast the mappers and combiners back into their pointer types
    void(*transform_fn)(edge_type edge, vertex_type other) = 
        reinterpret_cast<void(*)(edge_type, vertex_type)>(transform_ptr);
    basic_local_transform_neighborhood(
        *reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid)),
        edge_direction,
        transform_fn,
        vid);
  }

  static void basic_transform_neighborhood(typename GraphType::vertex_type current,
                                              edge_dir_type edge_direction,
                                              void(*transform_fn)(edge_type edge, vertex_type other)) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    // get the object ID of the graph
    size_t objid = graph.get_rpc_obj_id();
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<void > > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
        requests[ctr] = fiber_remote_request(proc, 
                                             transform_neighborhood_impl<GraphType>::basic_local_transform_neighborhood_from_remote,
                                             objid,
                                             edge_direction,
                                             reinterpret_cast<size_t>(transform_fn),
                                             current.id());
        ++ctr;
    }
    // compute the local tasks
    basic_local_transform_neighborhood(graph, 
                                       edge_direction, 
                                       transform_fn, 
                                       current.id());
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      requests[i]();
    }
  }
};


template <typename GraphType, typename ExtraArg>
struct transform_neighborhood_impl2 {

  typedef typename GraphType::vertex_type vertex_type;
  typedef typename GraphType::edge_type edge_type;
  typedef typename GraphType::local_vertex_type local_vertex_type;
  typedef typename GraphType::local_edge_type local_edge_type;
  typedef typename GraphType::vertex_record vertex_record;

/**************************************************************************/
/*                                                                        */
/*           Extended MapReduce Neighborhood Implementation               */
/*                                                                        */
/**************************************************************************/
/*
 * The master calls extended_mapreduce_neighborhood.
 * Which then issues calls to extended_local_mapper on each machine with a replica.
 * The extended mapreduce neighborhood allows the mapper and combiner to take
 * an optional argument
 */


  static void extended_local_transform_neighborhood(GraphType& graph,
                                 edge_dir_type edge_direction,
                                 void(*transform_fn)(edge_type edge,
                                                     vertex_type other,
                                                     const ExtraArg extra),
                                 vertex_id_type vid,
                                 const ExtraArg extra) {
    lvid_type lvid = graph.local_vid(vid);
    local_vertex_type local_vertex(graph.l_vertex(lvid));
    
    if(edge_direction == IN_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.in_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.source());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        transform_fn(edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
    // do out edges
    if(edge_direction == OUT_EDGES || edge_direction == ALL_EDGES) {
      foreach(local_edge_type local_edge, local_vertex.out_edges()) {
        edge_type edge(local_edge);
        vertex_type other(local_edge.target());
        lvid_type a = edge.source().local_id(), b = edge.target().local_id();
        graph.get_lock_manager()[std::min(a,b)].lock();
        graph.get_lock_manager()[std::max(a,b)].lock();
        transform_fn(edge, other, extra);
        graph.get_lock_manager()[a].unlock();
        graph.get_lock_manager()[b].unlock();
      }
    } 
  }


  static void extended_local_transform_neighborhood_from_remote(size_t objid,
                                                                edge_dir_type edge_direction,
                                                                size_t transform_ptr,
                                                                vertex_id_type vid,
                                                                const ExtraArg extra) {
    // cast the mappers and combiners back into their pointer types
    void(*transform_fn)(edge_type edge, vertex_type other, const ExtraArg) = 
        reinterpret_cast<void(*)(edge_type, vertex_type, const ExtraArg)>(transform_ptr);
    extended_local_transform_neighborhood(
        *reinterpret_cast<GraphType*>(distributed_control::get_instance()->get_registered_object(objid)),
        edge_direction,
        transform_fn,
        vid,
        extra);
  }

  static void extended_transform_neighborhood(typename GraphType::vertex_type current,
                                              edge_dir_type edge_direction,
                                              void(*transform_fn)(edge_type edge, vertex_type other, const ExtraArg extra),
                                              const ExtraArg extra) {
    // get a reference to the graph
    GraphType& graph = current.graph_ref;
    // get the object ID of the graph
    size_t objid = graph.get_rpc_obj_id();
    typename GraphType::vertex_record vrecord = graph.l_get_vertex_record(current.local_id());

    // make sure we are running on a master vertex
    ASSERT_EQ(vrecord.owner, distributed_control::get_instance_procid());
    
    // create num-mirrors worth of requests
    std::vector<request_future<void> > requests(vrecord.num_mirrors());
    
    size_t ctr = 0;
    foreach(procid_t proc, vrecord.mirrors()) {
      // issue the communication
        requests[ctr] = fiber_remote_request(proc, 
                                             transform_neighborhood_impl2<GraphType, ExtraArg>::extended_local_transform_neighborhood_from_remote,
                                             objid,
                                             edge_direction,
                                             reinterpret_cast<size_t>(transform_fn),
                                             current.id(),
                                             extra);
        ++ctr;
    }
    // compute the local tasks
    extended_local_transform_neighborhood(graph, 
                                          edge_direction, 
                                          transform_fn, 
                                          current.id(),
                                          extra);
    // now, wait for everyone
    for (size_t i = 0;i < requests.size(); ++i) {
      requests[i]();
    }
  }


};

} // namespace warp::warp_impl

/**
 * \ingroup warp
 * 
 * The transform_neighborhood() function allows a parallel transformation of the
 * neighborhood of a vertex to be performed. This is a blocking operation, and
 * will not return until the distributed computation is complete.  When run
 * inside a fiber, to hide latency, the system will automatically context
 * switch to evaluate some other fiber which is ready to run. 
 *
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * foreach(edge in neighborhood of current vertex) {
 *   transform_fn(edge, opposite vertex)
 * }
 * \endcode
 *
 * \attention It is important that the transform_fn should only make modifications to the
 * edge data, and not the data on the other vertex.
 *
 * \attention This call does not accomplish synchronization, thus 
 * modifications to the current vertex will not be reflected during
 * the call. In other words, inside the mapper function, only the values on
 * edge.data() and other.data() will be valid. The value of the vertex
 * on the "self" end of the edge will not reflect changes you made to the vertex
 * immediately before calling transform_neighborhood(). Use the overload of 
 * transform_neighborhood (below) if you want to pass on additional information to the
 * mapper.
 *
 * Here is an example which clears all the in edge values of some set of vertices.
 * \code
 * void clear_value(graph_type::edge_type edge, graph_type::vertex_type other) {
 *  edge.data() = 0;
 * }
 *
 *
 * void clear_in_edges(graph_type::vertex_type vertex) {
 *    warp::transform_neighborhood(vertex,
 *                                 IN_EDGES,
 *                                 clear_value);
 * }
 *
 * ...
 * warp::parfor_all_vertices(graph, clear_in_edges, some_vset); 
 * \endcode
 *
 *
 * An overload is provided which allows you to pass an additional arbitrary
 * argument to the transform
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param transform_fn The transform function to run on all the selected edges.
 * 
 * \see warp_engine
 * \see warp::parfor_all_vertices()
 * \see warp::map_reduce_neighborhood()
 * \see warp::broadcast_neighborhood()
 */
template <typename VertexType>
void transform_neighborhood(VertexType current,
                                edge_dir_type edge_direction,
                                void (*transform_fn)(typename VertexType::graph_type::edge_type edge,
                                                        VertexType other)) {
  warp_impl::
      transform_neighborhood_impl<typename VertexType::graph_type>::
                                      basic_transform_neighborhood(current, edge_direction, 
                                                                   transform_fn);
}


/**
 * \ingroup warp
 *
 * The transform_neighborhood function allows a parallel transformation of the
 * neighborhood of a vertex to be performed. This is a blocking operation, and
 * will not return until the distributed computation is complete.  When run
 * inside a fiber, to hide latency, the system will automatically context
 * switch to evaluate some other fiber which is ready to run. 
 *
 * This is the more general overload of the 
 * warp::transform_neighborhood() function
 * which allows an additional arbitrary extra argument to be passed along
 * to the mapper and combiner functions.
 *
 * Abstractly, the computation accomplishes the following:
 *
 * \code
 * foreach(edge in neighborhood of current vertex) {
 *   transform_fn(edge, extraarg, opposite vertex)
 * }
 * \endcode
 *
 * \attention It is important that the transform_fn should only make modifications to the
 * edge data, and not the data on the other vertex.
 *
 * \attention This call does not accomplish synchronization, thus 
 * modifications to the current vertex will not be reflected during
 * the call. In other words, inside the mapper function, only the values on
 * edge.data() and other.data() will be valid. The value of the vertex
 * on the "self" end of the edge will not reflect changes you made to the vertex
 * immediately before calling warp::transform_neighborhood() . Use the overload of 
 * warp::transform_neighborhood() if you want to pass on additional information to the
 * mapper.
 *
 * Here is an example which set all the in edge values of some set of vertices
 * to the source vertex's value.
 * \code
 * void set_value(graph_type::edge_type edge, graph_type::vertex_type other,
 *                  int value) {
 *  edge.data() = value;
 * }
 *
 *
 * void set_in_edges(graph_type::vertex_type vertex) {
 *    warp::transform_neighborhood(vertex,
 *                                 IN_EDGES,
 *                                 clear_value,
 *                                 vertex.data(), // say this is an integer
 *                                 );
 * }
 *
 * ...
 * warp::parfor_all_vertices(graph, set_in_edges, some_vset); 
 * \endcode
 *
 *
 * \param current The vertex to map reduce the neighborhood over
 * \param edge_direction To run over all IN_EDGES, OUT_EDGES or ALL_EDGES
 * \param extra An additional argument to be passed to the mapper and combiner functions.
 * \param transform_fn The transform function to run on all the selected edges.
 * 
 * \see warp_engine
 * \see warp::parfor_all_vertices()
 * \see warp::map_reduce_neighborhood()
 * \see warp::broadcast_neighborhood()
 */
template <typename ExtraArg, typename VertexType>
void transform_neighborhood(VertexType current,
                                edge_dir_type edge_direction,
                                void(*transform_fn)(typename VertexType::graph_type::edge_type edge,
                                                        VertexType other,
                                                        const ExtraArg extra),
                                const ExtraArg extra) {
  warp_impl::
      transform_neighborhood_impl2<typename VertexType::graph_type, ExtraArg>::
                                      extended_transform_neighborhood(current, edge_direction, 
                                                                      transform_fn,
                                                                      extra);
}


} // namespace warp

} // namespace graphlab

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/engine/warp_parfor_all_vertices.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_WARP_PARFOR_ALL_VERTICES_HPP
#define GRAPHLAB_WARP_PARFOR_ALL_VERTICES_HPP
#include <boost/function.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/graph/vertex_set.hpp>
#include <graphlab/rpc/dc.hpp>
namespace graphlab {
namespace warp {

namespace warp_impl {


/*
 * Actual Parfor implementation.
 * Holds a reference to all the arguments.
 * Each fiber increments the atomic counter and runs the fn on it)
 */
template <typename GraphType>
struct parfor_all_vertices_impl{

  GraphType& graph; 
  boost::function<void(typename GraphType::vertex_type)> fn;
  vertex_set& vset;
  atomic<size_t> ctr;

  parfor_all_vertices_impl(GraphType& graph,
                           boost::function<void(typename GraphType::vertex_type)> fn,
                           vertex_set& vset): graph(graph),fn(fn),vset(vset),ctr(0) { }

  void run_fiber() {
    while (1) {
      size_t lvid = ctr.inc_ret_last();
      if (lvid >= graph.num_local_vertices() || !vset.l_contains(lvid)) break;
      typename GraphType::local_vertex_type l_vertex = graph.l_vertex(lvid);
      if (l_vertex.owned()) {
        typename GraphType::vertex_type vertex(l_vertex);
        fn(vertex);
      }
    } 
  }
};

} // namespace warp_impl


/**
 * \ingroup warp
 *
 * This Warp function provides a simple parallel for loop over all vertices
 * in the graph, or in a given set of vertices. A large number of light-weight 
 * threads called fibers are used to run the user specified function,
 * allowing the user to make what normally will be blocking calls, on 
 * the neighborhood of each vertex.
 *
 * \code
 * float pagerank_map(graph_type::edge_type edge, graph_type::vertex_type other) {
 *  return other.data() / other.num_out_edges();
 * }
 *
 * void pagerank(graph_type::vertex_type vertex) {
 *    // computes an aggregate over the neighborhood using map_reduce_neighborhood
 *    vertex.data() = 0.15 + 0.85 * warp::map_reduce_neighborhood(vertex,
 *                                                                IN_EDGES,
 *                                                                pagerank_map);
 * }
 *
 * ...
 * // runs the pagerank function on all the vertices in the graph.
 * parfor_all_vertices(graph, pagerank); 
 * \endcode
 *
 * \param graph A reference to the graph object
 * \param fn A function to run on each vertex. Has the prototype void(GraphType::vertex_type). Can be a boost::function
 * \param vset A set of vertices to run on
 * \param nfibers Number of fiber threads to use. Defaults to 10000
 * \param stacksize Size of each fiber stack in bytes. Defaults to 16384 bytes
 *
 * \see graphlab::warp::map_reduce_neighborhood()
 * \see graphlab::warp::warp_graph_transform()
 */
template <typename GraphType, typename FunctionType>
void parfor_all_vertices(GraphType& graph,
                         FunctionType fn,
                         vertex_set vset = GraphType::complete_set(),
                         size_t nfibers = 10000,
                         size_t stacksize = 16384) {
  distributed_control::get_instance()->barrier();
  bool old_fast_track = distributed_control::get_instance()->set_fast_track_requests(false);
  fiber_group group;
  group.set_stacksize(stacksize);
  warp_impl::parfor_all_vertices_impl<GraphType> parfor(graph, fn, vset);
  
  for (size_t i = 0;i < nfibers; ++i) {
    group.launch(boost::bind(&warp_impl::parfor_all_vertices_impl<GraphType>::run_fiber, &parfor));
  }
  group.join();
  distributed_control::get_instance()->barrier();
  distributed_control::get_instance()->set_fast_track_requests(old_fast_track);
  graph.synchronize(vset);
}

} // namespace warp
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/graph/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/graph/builtin_parsers.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_GRAPH_BUILTIN_PARSERS_HPP
#define GRAPHLAB_GRAPH_BUILTIN_PARSERS_HPP

#include <string>
#include <sstream>
#include <iostream>

#if defined(__cplusplus) && __cplusplus >= 201103L
// do not include spirit
#else
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#endif

#include <graphlab/util/stl_util.hpp>
#include <graphlab/logger/logger.hpp>
#include <graphlab/serialization/serialization_includes.hpp>


namespace graphlab {

  namespace builtin_parsers {
  
    /**
     * \brief Parse files in the Stanford Network Analysis Package format.
     *
     * example:
     *
     *  # some comment
     *  # another comment
     *  1 2
     *  3 4
     *  1 4
     *
     */
    template <typename Graph>
    bool snap_parser(Graph& graph, const std::string& srcfilename,
                     const std::string& str) {
      if (str.empty()) return true;
      else if (str[0] == '#') {
        std::cout << str << std::endl;
      } else {
        size_t source, target;
        char* targetptr;
        source = strtoul(str.c_str(), &targetptr, 10);
        if (targetptr == NULL) return false;
        target = strtoul(targetptr, NULL, 10);
        if(source != target) graph.add_edge(source, target);
      }
      return true;
    } // end of snap parser

    /**
     * \brief Parse files in the standard tsv format
     *
     * This is identical to the SNAP format but does not allow comments.
     *
     */
    template <typename Graph>
    bool tsv_parser(Graph& graph, const std::string& srcfilename,
                    const std::string& str) {
      if (str.empty()) return true;
      size_t source, target;
      char* targetptr;
      source = strtoul(str.c_str(), &targetptr, 10);
      if (targetptr == NULL) return false;
      target = strtoul(targetptr, NULL, 10);
      if(source != target) graph.add_edge(source, target);
      return true;
    } // end of tsv parser


    template <typename Graph>
    bool csv_parser(Graph& graph, 
          const std::string& filename, 
          const std::string& textline) {
      if (textline.empty()) return true;
      size_t split = textline.find_first_of(",");
      if (split == std::string::npos) return true;
      else {
        std::string t = textline;
        t[split] = 0;
        graph.add_edge(strtoul(t.c_str(), NULL, 10), 
            strtoul(t.c_str() + split + 1, NULL, 10));
        return true;
      }
    }


#if defined(__cplusplus) && __cplusplus >= 201103L
    // The spirit parser seems to have issues when compiling under
    // C++11. Temporary workaround with a hard coded parser. TOFIX
    template <typename Graph>
    bool adj_parser(Graph& graph, const std::string& srcfilename,
                    const std::string& line) {
      // If the line is empty simply skip it
      if(line.empty()) return true;
      std::stringstream strm(line);
      vertex_id_type source; 
      size_t n;
      strm >> source;
      if (strm.fail()) return false;
      strm >> n;
      if (strm.fail()) return true;

      size_t nadded = 0;
      while (strm.good()) {
        vertex_id_type target;
        strm >> target;
        if (strm.fail()) break;
        if (source != target) graph.add_edge(source, target);
        ++nadded;
      } 
      if (n != nadded) return false;
      return true;
    } // end of adj parser

#else

    template <typename Graph>
    bool adj_parser(Graph& graph, const std::string& srcfilename,
                    const std::string& line) {
      // If the line is empty simply skip it
      if(line.empty()) return true;
      // We use the boost spirit parser which requires (too) many separate
      // namespaces so to make things clear we shorten them here.
      namespace qi = boost::spirit::qi;
      namespace ascii = boost::spirit::ascii;
      namespace phoenix = boost::phoenix;
      vertex_id_type source(-1);
      vertex_id_type ntargets(-1);
      std::vector<vertex_id_type> targets;
      const bool success = qi::phrase_parse
        (line.begin(), line.end(),       
         //  Begin grammar
         (
          qi::ulong_[phoenix::ref(source) = qi::_1] >> -qi::char_(",") >>
          qi::ulong_[phoenix::ref(ntargets) = qi::_1] >> -qi::char_(",") >>
          *(qi::ulong_[phoenix::push_back(phoenix::ref(targets), qi::_1)] % -qi::char_(","))
          )
         ,
         //  End grammar
         ascii::space); 
      // Test to see if the boost parser was able to parse the line
      if(!success || ntargets != targets.size()) {
        logstream(LOG_ERROR) << "Parse error in vertex prior parser." << std::endl;
        return false;
      }
      for(size_t i = 0; i < targets.size(); ++i) {
        if(source != targets[i]) graph.add_edge(source, targets[i]);
      }
      return true;
    } // end of adj parser
#endif

    template <typename Graph>
    struct tsv_writer{
      typedef typename Graph::vertex_type vertex_type;
      typedef typename Graph::edge_type edge_type;
      std::string save_vertex(vertex_type) { return ""; }
      std::string save_edge(edge_type e) {
        return tostr(e.source().id()) + "\t" + tostr(e.target().id()) + "\n";
      }
    };


    template <typename Graph>
    struct graphjrl_writer{
      typedef typename Graph::vertex_type vertex_type;
      typedef typename Graph::edge_type edge_type;

      /**
       * \internal
       * Replaces \255 with \255\1
       * Replaces \\n with \255\0
       */
      static std::string escape_newline(charstream& strm) {
        size_t ctr = 0;
        char *ptr = strm->c_str();
        size_t strmlen = strm->size();
        for (size_t i = 0;i < strmlen; ++i) {
          ctr += (ptr[i] == (char)255 || ptr[i] == '\n');
        }

        std::string ret(ctr + strmlen, 0);

        size_t target = 0;
        for (size_t i = 0;i < strmlen; ++i, ++ptr) {
          if ((*ptr) == (char)255) {
            ret[target] = (char)255;
            ret[target + 1] = 1;
            target += 2;
          }
          else if ((*ptr) == '\n') {
            ret[target] = (char)255;
            ret[target + 1] = 0;
            target += 2;
          }
          else {
            ret[target] = (*ptr);
            ++target;
          }
        }
        assert(target == ctr + strmlen);
        return ret;
      }

      /**
       * \internal
       * Replaces \255\1 with \255
       * Replaces \255\0 with \\n
       */
      static std::string unescape_newline(const std::string& str) {
        size_t ctr = 0;
        // count the number of escapes
        for (size_t i = 0;i < str.length(); ++i) {
          ctr += (str[i] == (char)255);
        }
        // real length is string length - escapes
        std::string ret(str.size() - ctr, 0);

        const char escapemap[2] = {'\n', (char)255};
        
        size_t target = 0;
        for (size_t i = 0;i < str.length(); ++i, ++target) {
          if (str[i] == (char)255) {
            // escape character
            ++i;
            ASSERT_MSG(str[i] == 0 || str[i] == 1,
                       "Malformed escape sequence in graphjrl file.");
            ret[target] = escapemap[(int)str[i]];
          }
          else {
            ret[target] = str[i];
          }
        }
        return ret;
      }
      
      std::string save_vertex(vertex_type v) {
        charstream strm(128);
        oarchive oarc(strm);
        oarc << char(0) << v.id() << v.data();
        strm.flush();
        return escape_newline(strm) + "\n";
      }
      
      std::string save_edge(edge_type e) {
        charstream strm(128);
        oarchive oarc(strm);
        oarc << char(1) << e.source().id() << e.target().id() << e.data();
        strm.flush();
        return escape_newline(strm) + "\n";
      }
    };


    template <typename Graph>
    bool graphjrl_parser(Graph& graph, const std::string& srcfilename,
                    const std::string& str) {
      std::string unescapedstr = graphjrl_writer<Graph>::unescape_newline(str);
      boost::iostreams::stream<boost::iostreams::array_source>
                      istrm(unescapedstr.c_str(), unescapedstr.length());
      iarchive iarc(istrm);
      
      char entrytype;
      iarc >> entrytype;
      if (entrytype == 0) {
        typename Graph::vertex_id_type vid;
        typename Graph::vertex_data_type vdata;
        iarc >> vid >> vdata;
        graph.add_vertex(vid, vdata);
      }
      else if (entrytype == 1) {
        typename Graph::vertex_id_type srcvid, destvid;
        typename Graph::edge_data_type edata;
        iarc >> srcvid >> destvid >> edata;
        graph.add_edge(srcvid, destvid, edata);
      }
      else {
        return false;
      }
      return true;
    }
    
  } // namespace builtin_parsers
} // namespace graphlab

#endif


================================================
FILE: src/graphlab/graph/distributed_graph.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_GRAPH_HPP
#define GRAPHLAB_DISTRIBUTED_GRAPH_HPP

#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <cmath>

#include <string>
#include <list>
#include <vector>
#include <set>
#include <map>
#include <graphlab/util/dense_bitset.hpp>


#include <queue>
#include <algorithm>
#include <functional>
#include <fstream>
#include <iostream>
#include <sstream>

#include <boost/functional.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>

#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/filesystem.hpp>
#include <boost/concept/requires.hpp>


#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/util/branch_hints.hpp>
#include <graphlab/util/generics/conditional_addition_wrapper.hpp>

#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/vertex_program/op_plus_eq_concept.hpp>

#include <graphlab/graph/local_graph.hpp>
#include <graphlab/graph/dynamic_local_graph.hpp>

#include <graphlab/graph/graph_gather_apply.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/ingress/distributed_oblivious_ingress.hpp>
#include <graphlab/graph/ingress/distributed_hdrf_ingress.hpp>
#include <graphlab/graph/ingress/distributed_random_ingress.hpp>
#include <graphlab/graph/ingress/distributed_identity_ingress.hpp>

#include <graphlab/graph/ingress/sharding_constraint.hpp>
#include <graphlab/graph/ingress/distributed_constrained_random_ingress.hpp>

#include <graphlab/graph/graph_hash.hpp>

#include <graphlab/util/hopscotch_map.hpp>

#include <graphlab/util/fs_util.hpp>
#include <graphlab/util/hdfs.hpp>


#include <graphlab/graph/builtin_parsers.hpp>
#include <graphlab/graph/vertex_set.hpp>

#include <graphlab/macros_def.hpp>
namespace tests {
class distributed_graph_test;
}
namespace graphlab {

  /** \brief A directed graph datastructure which is distributed across
   * multiple machines.
   *
   * This class implements a distributed directed graph datastructure where
   * vertices and edges may contain arbitrary user-defined datatypes as
   * templatized by the VertexData and EdgeData template parameters.
   *
   * ### Initialization
   *
   * To declare a distributed graph you write:
   * \code typedef
   * graphlab::distributed_graph<vdata, edata> graph_type;
   * graph_type graph(dc, clopts);
   * \endcode
   * where <code>vdata</code> is the type of data to be
   * stored on vertices, and <code>edata</code> is the type of data to be
   * stored on edges. The constructor must be called simultaneously on all
   * machines. <code>dc</code> is a graphlab::distributed_control object that
   * must be constructed at the start of the program, and clopts is a
   * graphlab::graphlab_options object that is used to pass graph
   * construction runtime options to the graph. See the code examples for
   * further details.
   *
   * Each vertex is uniquely identified by an unsigned  numeric ID of the type
   * graphlab::vertex_id_type. Vertex IDs need not be sequential. However, the
   * ID corresponding to <code>(vertex_id_type)(-1)</code> is reserved.  (This
   * is the largest possible ID, corresponding to 0xFFFFFFFF when using 32-bit
   * IDs).
   *
   * Edges are not numbered, but are uniquely identified by its source->target
   * pair. In other words, there can only be two edges between any pair of
   * vertices, the edge going in the forward direction, and the edge going in
   * the backward direction.
   *
   * ### Construction
   *
   * The distributed graph can be constructed in two different ways.  The
   * first, and the preferred method, is to construct the graph from files
   * located on a shared filesystem (NFS mounts for instance) , or from files
   * on HDFS (HDFS support must be compiled).
   *
   * To construct from files, the load_format() function provides built-in
   * parsers to construct the graph structure from various graph file formats
   * on disk or HDFS. Alternatively, the
   * \ref load(const std::string& path, line_parser_type line_parser) "load()"
   * function provides generalized parsing capabilities
   * allowing you to construct from your own defined file format.
   * Alternatively, load_binary() may be used to perform an extremely rapid
   * load of a graph previously saved with save_binary(). The caveat being that
   * the number of machines used to save the graph must match the number of
   * machines used to load the graph.
   *
   * The second construction strategy is to call the add_vertex() and
   * add_edge() functions directly. These functions are parallel reentrant, and
   * are also distributed. Each vertex and each edge should be added no more
   * than once across all machines.
   *
   * add_vertex() calls are not strictly required since add_edge(i, j) will
   * implicitly construct vertices i and j. The data on these vertices
   * will be default constructed.
   *
   * ### Finalization
   * After all vertices and edges are inserted into the graph
   * via either load from file functions or direct calls to add_vertex() and
   * add_edge(), for the graph to the useable, it must be finalized.
   *
   * This is performed by calling \code graph.finalize(); \endcode on all
   * machines simultaneously. None of the load* functions perform finalization
   * so multiple load operations could be performed (reading from different
   * file groups) before finalization.
   *
   * The finalize() operation partitions the graph and synchronizes all
   * internal graph datastructures. After this point, all graph computation
   * operations such as engine, map_reduce and transform operations will
   * function.
   *
   * ### Partitioning Strategies
   *
   * The graph is partitioned across the machines using a "vertex separator"
   * strategy where edges are assigned to machines, while vertices may span
   * multiple machines. There are three partitioning strategies implemented.
   * These can be selected by setting --graph_opts="ingress=[partition_method]"
   * on the command line.
   * \li \c "random" The most naive and the fastest partitioner. Random places
   *                 edges on machines.
   * \li \c "oblivious" Runs at roughly half the speed of random. Machines
   *                    indepedently partitions the segment of the graph it
   *                    read. Improves partitioning quality and will reduce
   *                    runtime memory consumption.
   *
   * \li \c "grid" Runs at rouphly the same speed of random. Randomly places
   *                edges on machines with a grid constraint.
   *                This obtains quality partition, close to oblivious,
   *                but currently only works with perfect square number of machines.
   *
   * \li \c "pds"  Runs at roughly the speed of random. Randomly places
   *                edges on machines with a sparser constraint generated by
   *                perfect difference set.  This obtains the highest quality partition,
   *                reducing runtime memory consumption significantly, without load-time penalty.
   *                Currently only works with p^2+p+1 number of machines (p prime).
   *	
   * \li \c "hdrf" Runs at roughly the speed of oblivious.
   *	            HDRF provides the smallest average replication factor with close to optimal load balance.
   *		    The HDRF algorithm is extensively described in the following publication:
   *		    F. Petroni, L. Querzoni, K. Daudjee, S. Kamali and G. Iacoboni: 
   *		    "HDRF: Stream-Based Partitioning for Power-Law Graphs". 
   *		    CIKM, 2015.
   *
   * ### Referencing Vertices / Edges Many GraphLab operations will pass around
   * vertex_type and edge_type objects. These objects are light-weight copyable
   * opaque references to vertices and edges in the distributed graph.  The
   * vertex_type object provides capabilities such as:
   * \li \c vertex_type::id() Returns the ID of the vertex
   * \li \c vertex_type::num_in_edges() Returns the number of in edges
   * \li \c vertex_type::num_out_edges() Returns the number of out edges
   * \li \c vertex_type::data() Returns a <b>reference</b> to the data on
   *                            the vertex
   *
   * No traversal operations are currently provided and there there is no
   * single method to return a list of adjacent edges to the vertex.
   *
   * The edge_type object has similar capabilities:
   * \li \c edge_type::data() Returns a <b>reference</b> to the data on the edge
   * \li \c edge_type::source() Returns a \ref vertex_type of the source vertex
   * \li \c edge_type::target() Returns a \ref vertex_type of the target vertex
   *
   * This permits the use of <code>edge.source().data()</code> for instance, to
   * obtain the vertex data on the source vertex.
   *
   * See the documentation for \ref vertex_type and \ref edge_type for further
   * details.
   *
   * Due to the distributed nature of the graph, There is at the moment, no way
   * to obtain a reference to arbitrary vertices or edges. The only way to
   * obtain a reference to vertices or edges, is if one is passed to you via a
   * callback (for instance in map_reduce_vertices() / map_reduce_edges() or in
   * an update function). To manipulate the graph at a more fine-grained level
   * will require a more intimate understanding of the underlying distributed
   * graph representation.
   *
   * ### Saving the graph
   * After computation is complete, the graph structure can be saved
   * via save_format() which provides built-in writers to write various
   * graph formats to disk or HDFS. Alternatively,
   * \ref save(const std::string& prefix, writer writer, bool gzip, bool save_vertex, bool save_edge, size_t files_per_machine) "save()"
   * provides generalized writing capabilities allowing you to write
   * your own graph output to disk or HDFS.
   *
   * ### Distributed Representation
   * The graph is partitioned over machines using vertex separators.
   * In other words, each edge is assigned to a unique machine while
   * vertices are allowed to span multiple machines.
   *
   * The image below demonstrates the procedure. The example graph
   * on the left is to be separated among 4 machines where the cuts
   * are denoted by the dotted red lines. After partitioning,
   * (the image on the right), each vertex along the cut
   * is now separated among multiple machines. For instance, the
   * central vertex spans 4 different machines.
   *
   * \image html partition_fig.gif
   *
   * Each vertex which span multiple machines, has a <b>master</b>
   * machine (a black vertex), and all other instances of the vertex
   * are called <b>mirrors</b>. For instance, we observe that the central
   * vertex spans 4 machines, where machine 3 holds the <b>master</b>
   * copy, while all remaining machines hold <b>mirrored</b> copies.
   *
   * This concept of vertex separators allow us to easily manage large
   * power-law graphs where vertices may have extremely high degrees,
   * since the adjacency information for even the high degree vertices
   * can be separated across multiple machines.
   *
   * ### Internal Representation
   * \warning This is only useful if you plan to make use of the graph
   * in ways which exceed the provided abstractions.
   *
   * Each machine maintains its local section of the graph in a
   * graphlab::local_graph object. The local_graph object assigns
   * each vertex a sequential vertex ID called the local vertex ID.
   * A hash table is used to provide a mapping between the local vertex IDs
   * and their corresponding global vertex IDs. Additionally, each local
   * vertex is associated with a \ref vertex_record which provides information
   * about global ID of the vertex, the machine which holds the master instance
   * of the vertex, as well as a list of all machines holding a mirror
   * of the vertex.
   *
   * To support traversal of the local graph, two additional types, the
   * \ref local_vertex_type and the \ref local_edge_type is provided which
   * provide references to vertices and edges on the local graph. These behave
   * similarly to the \ref vertex_type and \ref edge_type types and have
   * similar functionality. However, since these reference the local graph,
   * there is substantially more flexility. In particular, the function
   * l_vertex() may be used to obtain a reference to a local vertex from a
   * local vertex ID. Also unlike the \ref vertex_type , the \ref
   * local_vertex_type support traversal operations such as returning a list of
   * all in_edges (local_vertex_type::in_edges()).  However, the list only
   * contains the edges which are local to the current machine. See
   * \ref local_vertex_type and \ref local_edge_type for more details.
   *
   *
   * \tparam VertexData Type of data stored on vertices. Must be
   *                    Copyable, Default Constructable, Copy
   *                    Constructable and \ref sec_serializable.
   * \tparam EdgeData Type of data stored on edges. Must be
   *                  Copyable, Default Constructable, Copy
   *                  Constructable and \ref sec_serializable.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_graph {
  public:

    /// The type of the vertex data stored in the graph
    typedef VertexData vertex_data_type;

    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that vertex data be default
     * constructible.  That is you must have a public member:
     *
     * \code
     * class vertex_data {
     * public:
     *   vertex_data() { }
     * }; //
     * \endcode
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<VertexData>));
    /// \endcond

    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that vertex data be default
     * Serializable.
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<VertexData>));
    /// \endcond


    /// The type of the edge data stored in the graph
    typedef EdgeData   edge_data_type;

    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that edge data be default
     * constructible.  That is you must have a public member:
     *
     * \code
     * class edge_data {
     * public:
     *   edge_data() { }
     * }; //
     * \endcode
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<EdgeData>));
    /// \endcond

    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that edge data be default
     * Serializable.
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<EdgeData>));
    /// \endcond


    /**
       The line parse is any function (or functor) that has the form:

       <code>
        bool line_parser(distributed_graph& graph, const std::string& filename,
                         const std::string& textline);
       </code>

       the line parser returns true if the line is parsed successfully and
       calls graph.add_vertex(...) or graph.add_edge(...)

       See \ref graphlab::distributed_graph::load(std::string path, line_parser_type line_parser) "load()"
       for details.
     */
    typedef boost::function<bool(distributed_graph&, const std::string&,
                                 const std::string&)> line_parser_type;


    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> mirror_type;

    /// The type of the local graph used to store the graph data
#ifdef USE_DYNAMIC_LOCAL_GRAPH
    typedef graphlab::dynamic_local_graph<VertexData, EdgeData> local_graph_type;
#else 
    typedef graphlab::local_graph<VertexData, EdgeData> local_graph_type;
#endif
    typedef graphlab::distributed_graph<VertexData, EdgeData> graph_type;

    typedef std::vector<simple_spinlock> lock_manager_type;

    friend class distributed_ingress_base<VertexData, EdgeData>;


    // Make friends with graph operation classes 
    template <typename Graph, typename GatherType>
    friend class graph_gather_apply;


    // Make friends with Ingress classes
    friend class distributed_random_ingress<VertexData, EdgeData>;
    friend class distributed_identity_ingress<VertexData, EdgeData>;
    friend class distributed_oblivious_ingress<VertexData, EdgeData>;
    friend class distributed_hdrf_ingress<VertexData, EdgeData>;
    friend class distributed_constrained_random_ingress<VertexData, EdgeData>;

    typedef graphlab::vertex_id_type vertex_id_type;
    typedef graphlab::lvid_type lvid_type;
    typedef graphlab::edge_id_type edge_id_type;

    struct vertex_type;
    typedef bool edge_list_type;
    class edge_type;

    struct local_vertex_type;
    struct local_edge_list_type;
    class local_edge_type;

    /**
     * \brief Vertex object which provides access to the vertex data
     * and information about the vertex.
     *
     * The vertex_type object may be copied and has very little internal
     * state. It behaves as a reference to location of the vertex
     * in the internal graph representation. While vertex_type may be copied
     * it must not outlive the underlying graph.
     */
    struct vertex_type {
      typedef distributed_graph graph_type;
      distributed_graph& graph_ref;
      lvid_type lvid;

      /// \cond GRAPHLAB_INTERNAL
      /** \brief Constructs a vertex_type object with local vid
       * lvid. This function should not be used directly. Use
       * distributed_graph::vertex() or distributed_graph::l_vertex()
       *
       * \param graph_ref A reference to the parent graph object
       * \param lvid The local VID of the vertex to be accessed
       */
      vertex_type(distributed_graph& graph_ref, lvid_type lvid):
            graph_ref(graph_ref), lvid(lvid) { }
      /// \endcond

      /// \brief Compares two vertex_type's for equality. Returns true
      //  if they reference the same vertex and false otherwise.
      bool operator==(vertex_type& v) const {
        return lvid == v.lvid;
      }

      /// \brief Returns a constant reference to the data on the vertex
      const vertex_data_type& data() const {
        return graph_ref.get_local_graph().vertex_data(lvid);
      }

      /// \brief Returns a mutable reference to the data on the vertex
      vertex_data_type& data() {
        return graph_ref.get_local_graph().vertex_data(lvid);
      }

      /// \brief Returns the number of in edges of the vertex
      size_t num_in_edges() const {
        return graph_ref.l_get_vertex_record(lvid).num_in_edges;
      }

      /// \brief Returns the number of out edges of the vertex
      size_t num_out_edges() const {
        return graph_ref.l_get_vertex_record(lvid).num_out_edges;
      }

      /// \brief Returns the vertex ID of the vertex
      vertex_id_type id() const {
        return graph_ref.global_vid(lvid);
      }

      /// \cond GRAPHLAB_INTERNAL

      /// \brief Returns a list of in edges (not implemented)
      edge_list_type in_edges() __attribute__ ((noreturn)) {
        ASSERT_TRUE(false);
      }

      /// \brief Returns a list of out edges (not implemented)
      edge_list_type out_edges() __attribute__ ((noreturn)) {
        ASSERT_TRUE(false);
      }
      /// \endcond

      /**
       *  \brief Returns the local ID of the vertex
       */
      lvid_type local_id() const {
        return lvid;
      }

    };


    /**
     * \brief The edge represents an edge in the graph and provide
     * access to the data associated with that edge as well as the
     * source and target distributed::vertex_type objects.
     *
     * An edge object may be copied and has very little internal state
     * and essentially only a reference to the location of the edge
     * information in the underlying graph.  Therefore edge objects
     * can be copied but must not outlive the underlying graph.
     */
    class edge_type {
    private:
      /** \brief An internal reference to the underlying graph */
      distributed_graph& graph_ref;
      /** \brief The edge in the local graph */
      typename local_graph_type::edge_type edge;

      /**
       * \internal
       * \brief Constructs a edge_type object from a edge_type
       * object of the graphlab::local_graph.
       * lvid. This function should not be used directly.
       *
       * \param graph_ref A reference to the parent graph object
       * \param edge The local graph's edge_type to access
       */
      edge_type(distributed_graph& graph_ref,
                typename local_graph_type::edge_type edge):
        graph_ref(graph_ref), edge(edge) { }
      friend class distributed_graph;
    public:

      /** 
       * \internal
       * Unimplemented default constructor to help with
       * various type inference needs
       */
      edge_type();

      /**
       * \brief Returns the source vertex of the edge.
       * This function returns a vertex_object by value and as a
       * consequence it is possible to use the resulting vertex object
       * to access and *modify* the associated vertex data.
       *
       * Modification of vertex data obtained through an edge object
       * is *usually not safe* and can lead to data corruption.
       *
       * \return The vertex object representing the source vertex.
       */
      vertex_type source() const {
        return vertex_type(graph_ref, edge.source().id());
      }

      /**
       * \brief Returns the target vertex of the edge.
       *
       * This function returns a vertex_object by value and as a
       * consequence it is possible to use the resulting vertex object
       * to access and *modify* the associated vertex data.
       *
       * Modification of vertex data obtained through an edge object
       * is *usually not safe* and can lead to data corruption.
       *
       * \return The vertex object representing the target vertex.
       */
      vertex_type target() const {
        return vertex_type(graph_ref, edge.target().id());
      }

      /**
       * \brief Returns a constant reference to the data on the edge
       */
      const edge_data_type& data() const { return edge.data(); }

      /**
       * \brief Returns a mutable reference to the data on the edge
       */
      edge_data_type& data() { return edge.data(); }

    }; // end of edge_type


    // CONSTRUCTORS ==========================================================>

    /**
     * Constructs a distributed graph. All machines must call this constructor
     * simultaneously.
     *
     * Value graph options are:
     * \li \c ingress The graph partitioning method to use. May be "random"
     *                "grid" or "pds". The methods have roughly the same runtime 
     *                complexity, but the increasing partition qaulity. "grid" 
     *                requires number of machine P be able to layout as a n*m = P 
     *                grid with ( |m-n| <= 2). "pds" uses requires P = p^2+p+1 where 
     *                p is a prime number.
     *
     * \li \c userecent An optimization that can decrease memory utilization
     *                of oblivious and batch quite significantly (especially
     *                when there are a large number of machines) at a small
     *                partitioning penalty. Defaults to 0. Set to 1 to
     *                enable.
     * \li \c bufsize The batch size used by the batch ingress method.
     *                Defaults to 50,000. Increasing this number will
     *                decrease partitioning time with a penalty to partitioning
     *                quality.
     *
     * \param [in] dc Distributed controller to associate with
     * \param [in] opts A graphlab::graphlab_options object specifying engine
     *                  parameters.  This is typically constructed using
     *                  \ref graphlab::command_line_options.
     */
    distributed_graph(distributed_control& dc,
                      const graphlab_options& opts = graphlab_options()) :
      rpc(dc, this), finalized(false), vid2lvid(),
      nverts(0), nedges(0), local_own_nverts(0), nreplicas(0),
      ingress_ptr(NULL), 
#ifdef _OPENMP
      vertex_exchange(dc, omp_get_max_threads()), 
#else
      vertex_exchange(dc), 
#endif
      vset_exchange(dc), parallel_ingress(true) {
      rpc.barrier();
      set_options(opts);
    }

    ~distributed_graph() {
      delete ingress_ptr; ingress_ptr = NULL;
    }


    lock_manager_type& get_lock_manager() {
      return lock_manager;
    }
  private:
    void set_options(const graphlab_options& opts) {
      size_t bufsize = 50000;
      bool usehash = false;
      bool userecent = false;
      std::string ingress_method = "";
      std::vector<std::string> keys = opts.get_graph_args().get_option_keys();
      foreach(std::string opt, keys) {
        if (opt == "ingress") {
          opts.get_graph_args().get_option("ingress", ingress_method);
          if (rpc.procid() == 0)
            logstream(LOG_EMPH) << "Graph Option: ingress = "
              << ingress_method << std::endl;
        } else if (opt == "parallel_ingress") {
         opts.get_graph_args().get_option("parallel_ingress", parallel_ingress);
          if (!parallel_ingress && rpc.procid() == 0)
            logstream(LOG_EMPH) << "Disable parallel ingress. Graph will be streamed through one node."
              << std::endl;
        }
        /**
         * These options below are deprecated.
         */
        else if (opt == "bufsize") {
          opts.get_graph_args().get_option("bufsize", bufsize);
           if (rpc.procid() == 0)
            logstream(LOG_EMPH) << "Graph Option: bufsize = "
              << bufsize << std::endl;
       } else if (opt == "usehash") {
          opts.get_graph_args().get_option("usehash", usehash);
          if (rpc.procid() == 0)
            logstream(LOG_EMPH) << "Graph Option: usehash = "
              << usehash << std::endl;
        } else if (opt == "userecent") {
          opts.get_graph_args().get_option("userecent", userecent);
           if (rpc.procid() == 0)
            logstream(LOG_EMPH) << "Graph Option: userecent = "
              << userecent << std::endl;
        }  else {
          logstream(LOG_ERROR) << "Unexpected Graph Option: " << opt << std::endl;
        }
    }
      set_ingress_method(ingress_method, bufsize, usehash, userecent);
    }

  public:


    // METHODS ===============================================================>
    bool is_dynamic() const {
      return local_graph.is_dynamic();
    }
    
    /**
     * \brief Commits the graph structure. Once a graph is finalized it may
     * no longer be modified. Must be called on all machines simultaneously.
     *
     * Finalize is used to complete graph ingress by resolving vertex
     * ownship and completing local data structures. Once a graph is finalized
     * its structure may not be modified. Repeated calls to finalize() do
     * nothing.
     */
    void finalize() {
#ifndef USE_DYNAMIC_LOCAL_GRAPH
      if (finalized) return;
#endif
      ASSERT_NE(ingress_ptr, NULL);
      logstream(LOG_INFO) << "Distributed graph: enter finalize" << std::endl;
      ingress_ptr->finalize();
      lock_manager.resize(num_local_vertices());
      rpc.barrier(); 

      finalized = true;
    }

    /// \brief Returns true if the graph is finalized.
    bool is_finalized() {
      return finalized;
    }

    /** \brief Get the number of vertices */
    size_t num_vertices() const { return nverts; }

    /** \brief Get the number of edges */
    size_t num_edges() const { return nedges; }

    /** \brief converts a vertex ID to a vertex object. This function should
     *   not be used without a deep understanding of the distributed graph
     *   representation.
     *
     * This functions converts a global vertex ID to a vertex_type object.
     * The global vertex ID must exist on this machine or assertion failures
     * will be produced.
     */
    vertex_type vertex(vertex_id_type vid) {
      return vertex_type(*this, local_vid(vid));
    }

    /// \cond GRAPHLAB_INTERNAL
    /** \brief Get a list of all in edges of a given vertex ID. Not Implemented */
    edge_list_type in_edges(const vertex_id_type vid) const
      __attribute__((noreturn)) {
      // Not implemented.
      logstream(LOG_WARNING) << "in_edges not implemented. " << std::endl;
      ASSERT_TRUE(false);
    }

    /** Get a list of all out edges of a given vertex ID. Not Implemented */
    edge_list_type out_edges(const vertex_id_type vid) const
      __attribute__((noreturn)) {
            // Not implemented.
      logstream(LOG_WARNING) << "in_edges not implemented. " << std::endl;
      ASSERT_TRUE(false);
    }
    /// \endcond


    /**
     * \brief Returns the number of in edges of a given global vertex ID. This
     * function should not be used without a deep understanding of the
     * distributed graph representation.
     *
     * Returns the number of in edges of a given vertex ID.  Equivalent to
     * vertex(vid).num_in_edges(). The global vertex ID must exist on this
     * machine or assertion failures will be produced.
     */
    size_t num_in_edges(const vertex_id_type vid) const {
      return get_vertex_record(vid).num_in_edges;
    }


    /**
     * \brief Returns the number of out edges of a given global vertex ID. This
     * function should not be used without a deep understanding of the
     * distributed graph representation.
     *
     * Returns the number of out edges of a given vertex ID.  Equivalent to
     * vertex(vid).num_out_edges(). The global vertex ID must exist on this
     * machine or assertion failures will be produced.
     */
    size_t num_out_edges(const vertex_id_type vid) const {
      return get_vertex_record(vid).num_out_edges;
    }


    /**
     * Defines the strategy to use when duplicate vertices are inserted.
     * The default behavior is that an arbitrary vertex data is picked.
     * This allows you to define a combining strategy.
     */
    void set_duplicate_vertex_strategy(boost::function<void(vertex_data_type&,
                                                        const vertex_data_type&)>
                                       combine_strategy) {
      ingress_ptr->set_duplicate_vertex_strategy(combine_strategy);
    }

    /**
     * \brief Creates a vertex containing the vertex data.
     *
     * Creates a vertex with a particular vertex ID and containing a
     * particular vertex data. Vertex IDs need not be sequential, and
     * may arbitrarily span the unsigned integer range of vertex_id_type
     * with the exception of (vertex_id_type)(-1), or corresponding to
     * 0xFFFFFFFF on 32-bit vertex IDs.
     *
     * This function is parallel and distributed. i.e. It does not matter which
     * machine, or which thread on which machines calls add_vertex() for a
     * particular ID.
     *
     * However, each vertex may only be added exactly once.
     *
     * Returns true if successful, returns false if a vertex with id (-1) 
     * was added.
     */
    bool add_vertex(const vertex_id_type& vid,
                    const VertexData& vdata = VertexData() ) {
#ifndef USE_DYNAMIC_LOCAL_GRAPH
      if(finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to add a vertex to a finalized graph."
          << "\n\tVertices cannot be added to a graph after finalization."
          << std::endl;
      }
#else
      finalized = false;
#endif
      if(vid == vertex_id_type(-1)) {
        logstream(LOG_ERROR)
          << "\n\tAdding a vertex with id -1 is not allowed."
          << "\n\tThe -1 vertex id is reserved for internal use."
          << std::endl;
        return false;
      }
      ASSERT_NE(ingress_ptr, NULL);
      ingress_ptr->add_vertex(vid, vdata);
      return true;
    }


    /**
     * \brief Creates an edge connecting vertex source, and vertex target().
     *
     * Creates a edge connecting two vertex IDs.
     *
     * This function is parallel and distributed. i.e. It does not matter which
     * machine, or which thread on which machines calls add_edge() for a
     * particular ID.
     *
     * However, each edge direction may only be added exactly once. i.e.
     * if edge 5->6 is added already, no other calls to add edge 5->6 should be
     * made.
     *
     * Returns true on success. Returns false if it is a self-edge, or if
     * we are trying to create a vertex with ID (vertex_id_type)(-1).
     */
    bool add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata = EdgeData()) {

#ifndef USE_DYNAMIC_LOCAL_GRAPH
      if(finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to add an edge to a finalized graph."
          << "\n\tEdges cannot be added to a graph after finalization."
          << std::endl;
      }
#else 
      finalized = false;
#endif
      if(source == vertex_id_type(-1)) {
        logstream(LOG_ERROR)
          << "\n\tThe source vertex with id vertex_id_type(-1)\n"
          << "\tor unsigned value " << vertex_id_type(-1) << " in edge \n"
          << "\t(" << source << "->" << target << ") is not allowed.\n"
          << "\tThe -1 vertex id is reserved for internal use."
          << std::endl;
        return false;
      }
      if(target == vertex_id_type(-1)) {
        logstream(LOG_ERROR)
          << "\n\tThe target vertex with id vertex_id_type(-1)\n"
          << "\tor unsigned value " << vertex_id_type(-1) << " in edge \n"
          << "\t(" << source << "->" << target << ") is not allowed.\n"
          << "\tThe -1 vertex id is reserved for internal use."
          << std::endl;
        return false;
      }
      if(source == target) {
        logstream(LOG_ERROR)
          << "\n\tTrying to add self edge (" << source << "->" << target << ")."
          << "\n\tSelf edges are not allowed."
          << std::endl;
        return false;
      }
      ASSERT_NE(ingress_ptr, NULL);

      ingress_ptr->add_edge(source, target, edata);
      return true;
    }


   /**
    * \brief Performs a map-reduce operation on each vertex in the
    * graph returning the result.
    *
    * Given a map function, map_reduce_vertices() call the map function on all
    * vertices in the graph. The return values are then summed together and the
    * final result returned. The map function should only read the vertex data
    * and should not make any modifications. map_reduce_vertices() must be
    * called on all machines simultaneously.
    *
    * ### Basic Usage
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    *
    * To compute an absolute sum over all the vertex data, we would write
    * a function which reads in each a vertex, and returns the absolute
    * value of the data on the vertex.
    * \code
    * float absolute_vertex_data(const graph_type::vertex_type& vertex) {
    *   return std::fabs(vertex.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = graph.map_reduce_vertices<float>(absolute_vertex_data);
    * \endcode
    * will call the <code>absolute_vertex_data()</code> function
    * on each vertex in the graph. <code>absolute_vertex_data()</code>
    * reads the value of the vertex and returns the absolute result.
    * This return values are then summed together and returned.
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the mapfunction.
    *
    * The optional argument vset can be used to restrict he set of vertices
    * map-reduced over.
    *
    * ### Relations
    * This function is similar to
    * graphlab::iengine::map_reduce_vertices()
    * with the difference that this does not take a context
    * and thus cannot influence engine signalling.
    * transform_vertices() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam VertexMapperType The type of the map function.
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param mapfunction The map function to use. Must take
    *                   a \ref vertex_type, or a reference to a
    *                   \ref vertex_type as its only argument.
    *                   Returns a ReductionType which must be summable
    *                   and \ref sec_serializable .
    * \param vset The set of vertices to map reduce over. Optional. Defaults to
    *             complete_set()
    */
    template <typename ReductionType, typename MapFunctionType>
    ReductionType map_reduce_vertices(MapFunctionType mapfunction,
                                      const vertex_set& vset = complete_set()) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to run graph.map_reduce_vertices(...) "
          << "\n\tbefore calling graph.finalize()."
          << std::endl;
      }

      rpc.barrier();
      bool global_result_set = false;
      ReductionType global_result = ReductionType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        bool result_set = false;
        ReductionType result = ReductionType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
          if (lvid2record[i].owner == rpc.procid() &&
              vset.l_contains((lvid_type)i)) {
            if (!result_set) {
              const vertex_type vtx(l_vertex(i));
              result = mapfunction(vtx);
              result_set = true;
            }
            else if (result_set){
              const vertex_type vtx(l_vertex(i));
              const ReductionType tmp = mapfunction(vtx);
              result += tmp;
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (result_set) {
            if (!global_result_set) {
              global_result = result;
              global_result_set = true;
            }
            else {
              global_result += result;
            }
          }
        }
      }
      conditional_addition_wrapper<ReductionType>
        wrapper(global_result, global_result_set);
      rpc.all_reduce(wrapper);
      return wrapper.value;
    } // end of map_reduce_vertices

   /**
    * \brief Performs a map-reduce operation on each edge in the
    * graph returning the result.
    *
    * Given a map function, map_reduce_edges() call the map function on all
    * edges in the graph. The return values are then summed together and the
    * final result returned. The map function should only read data
    * and should not make any modifications. map_reduce_edges() must be
    * called on all machines simultaneously.
    *
    * ### Basic Usage
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    *
    * To compute an absolute sum over all the edge data, we would write
    * a function which reads in each a edge, and returns the absolute
    * value of the data on the edge.
    * \code
    * float absolute_edge_datac(const graph_type::edge_type& edge) {
    *   return std::fabs(edge.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = graph.map_reduce_edges<float>(absolute_edge_data);
    * \endcode
    * will call the <code>absolute_edge_data()</code> function
    * on each edge in the graph. <code>absolute_edge_data()</code>
    * reads the value of the edge and returns the absolute result.
    * This return values are then summed together and returned.
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the mapfunction.
    *
    * The two optional arguments vset and edir can be used to restrict the
    * set of edges which are map-reduced over.
    *
    * ### Relations
    * This function similar to
    * graphlab::distributed_graph::map_reduce_edges()
    * with the difference that this does not take a context
    * and thus cannot influence engine signalling.
    * Finally transform_edges() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam EdgeMapperType The type of the map function.
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param mapfunction The map function to use. Must take
    *                   a \ref edge_type, or a reference to a
    *                   \ref edge_type as its only argument.
    *                   Returns a ReductionType which must be summable
    *                   and \ref sec_serializable .
    * \param vset A set of vertices. Combines with
    *             edir to identify the set of edges. For instance, if
    *             edir == IN_EDGES, map_reduce_edges will map over all in edges
    *             of the vertices in vset. Optional. Defaults to complete_set().
    * \param edir An edge direction. Combines with vset to identify the set
    *             of edges to map over. For instance, if
    *             edir == IN_EDGES, map_reduce_edges will map over all in edges
    *             of the vertices in vset. Optional. Defaults to IN_EDGES.
    */
    template <typename ReductionType, typename MapFunctionType>
    ReductionType map_reduce_edges(MapFunctionType mapfunction,
                                   const vertex_set& vset = complete_set(),
                                   edge_dir_type edir = IN_EDGES) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to run graph.map_reduce_vertices(...)"
          << "\n\tbefore calling graph.finalize()."
          << std::endl;
      }

      rpc.barrier();
      bool global_result_set = false;
      ReductionType global_result = ReductionType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        bool result_set = false;
        ReductionType result = ReductionType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
          if (vset.l_contains((lvid_type)i)) {
            if (edir == IN_EDGES || edir == ALL_EDGES) {
              foreach(const local_edge_type& e, l_vertex(i).in_edges()) {
                if (!result_set) {
                  edge_type edge(e);
                  result = mapfunction(edge);
                  result_set = true;
                }
                else if (result_set){
                  edge_type edge(e);
                  const ReductionType tmp = mapfunction(edge);
                  result += tmp;
                }
              }
            }
            if (edir == OUT_EDGES || edir == ALL_EDGES) {
              foreach(const local_edge_type& e, l_vertex(i).out_edges()) {
                if (!result_set) {
                  edge_type edge(e);
                  result = mapfunction(edge);
                  result_set = true;
                }
                else if (result_set){
                  edge_type edge(e);
                  const ReductionType tmp = mapfunction(edge);
                  result += tmp;
                }
              }
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (result_set) {
            if (!global_result_set) {
              global_result = result;
              global_result_set = true;
            }
            else {
              global_result += result;
            }
          }
        }
      }

      conditional_addition_wrapper<ReductionType>
        wrapper(global_result, global_result_set);
      rpc.all_reduce(wrapper);
      return wrapper.value;
   } // end of map_reduce_edges


   /**
    * \brief Performs a fold operation on each vertex in the
    * graph returning the result.
    *
    * Given a fold function, fold_vertices() call the fold function on all
    * vertices in the graph, passing around a aggregator variable.
    * The return values are then summed together across machines using the
    * combiner function and the final result returned. The fold function should
    * only read the vertex data and should not make any modifications.
    * fold_vertices() must be called on all machines simultaneously.
    *
    * ### Basic Usage
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    *
    * To compute an absolute sum over all the vertex data, we would write
    * a function which reads in each a vertex, and returns the absolute
    * value of the data on the vertex.
    * \code
    * void absolute_vertex_data(const graph_type::vertex_type& vertex, float& total) {
    *   total += std::fabs(vertex.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = graph.fold_vertices<float>(absolute_vertex_data);
    * \endcode
    * will call the <code>absolute_vertex_data()</code> function
    * on each vertex in the graph. <code>absolute_vertex_data()</code>
    * reads the value of the vertex and returns the absolute result.
    * This return values are then summed together and returned.
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the fold function.
    *
    * The optional argument vset can be used to restrict he set of vertices
    * map-reduced over.
    *
    * Unlike map_reduce_vertices, this function exposes to a certain extent,
    * the internals of the parallelism structure since the fold is used within
    * a thread, but across threads/machines operator+= is used. The behavior
    * of the foldfunction, or the behavior of the return type should not make
    * assumptions on the undocumented behavior of this function (such as 
    * when the fold is used, and when += is used).
    *
    * ### Relations
    * This function is similar to
    * map_reduce_vertices()
    * with the difference that this uses a fold and is hence more efficient
    * for large aggregation objects.
    * transform_vertices() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam VertexFoldType The type of the fold function.
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param foldfunction The fold function to use. Must take
    *                   a \ref vertex_type, or a reference to a
    *                   \ref vertex_type as its first argument, and a 
    *                   reference to a ReductionType in its second argument.
    * \param vset The set of vertices to fold reduce over. Optional. Defaults to
    *             complete_set()
    */
    template <typename ReductionType, typename VertexFoldType>
    ReductionType fold_vertices(VertexFoldType foldfunction,
                                      const vertex_set& vset = complete_set()) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to run graph.map_reduce_vertices(...) "
          << "\n\tbefore calling graph.finalize()."
          << std::endl;
      }

      rpc.barrier();
      bool global_result_set = false;
      ReductionType global_result = ReductionType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        ReductionType result = ReductionType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
          if (lvid2record[i].owner == rpc.procid() &&
              vset.l_contains((lvid_type)i)) {
            const vertex_type vtx(l_vertex(i));
            foldfunction(vtx, result);
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (!global_result_set) {
            global_result = result;
            global_result_set = true;
          }
          else {
            global_result += result;
          }
        }
      }
      conditional_addition_wrapper<ReductionType>
        wrapper(global_result, global_result_set);
      rpc.all_reduce(wrapper);
      return wrapper.value;
    } 


   /**
    * \brief Performs a fold operation on each edge in the
    * graph returning the result.
    *
    * Given a fold function, fold_edges() call the fold function on all
    * edges in the graph passing an aggregator. 
    * The return values are then summed together across machines and 
    * final result returned. The fold function should only read data
    * and should not make any modifications. fold_edges() must be
    * called on all machines simultaneously.
    *
    * ### Basic Usage
    * For instance, if the graph has float vertex data, and float edge data:
    * \code
    *   typedef graphlab::distributed_graph<float, float> graph_type;
    * \endcode
    *
    * To compute an absolute sum over all the edge data, we would write
    * a function which reads in each a edge, and returns the absolute
    * value of the data on the edge.
    * \code
    * void absolute_edge_data(const graph_type::edge_type& edge, float& acc) {
    *   acc += std::fabs(edge.data());
    * }
    * \endcode
    * After which calling:
    * \code
    * float sum = graph.fold_edges<float>(absolute_edge_data);
    * \endcode
    * will call the <code>absolute_edge_data()</code> function
    * on each edge in the graph. <code>absolute_edge_data()</code>
    * reads the value of the edge and returns the absolute result.
    * This return values are then summed together and returned.
    * All machines see the same result.
    *
    * The template argument <code><float></code> is needed to inform
    * the compiler regarding the return type of the foldfunction.
    *
    * The two optional arguments vset and edir can be used to restrict the
    * set of edges which are map-reduced over.
    *
    * ### Relations
    * This function similar to
    * graphlab::distributed_graph::map_reduce_edges()
    * with the difference that this uses a fold and is hence more efficient
    * for large aggregation objects.
    * Finally transform_edges() can be used to perform a similar
    * but may also make modifications to graph data.
    *
    * \tparam ReductionType The output of the map function. Must have
    *                    operator+= defined, and must be \ref sec_serializable.
    * \tparam EdgeFoldType The type of the Fold function.
    *                          Not generally needed.
    *                          Can be inferred by the compiler.
    * \param fold function The map function to use. Must take
    *                   a \ref edge_type, or a reference to a
    *                   \ref edge_type as its first argument, and
    *                   a reference to a ReductionType in its second argument.
    * \param vset A set of vertices. Combines with
    *             edir to identify the set of edges. For instance, if
    *             edir == IN_EDGES, map_reduce_edges will map over all in edges
    *             of the vertices in vset. Optional. Defaults to complete_set().
    * \param edir An edge direction. Combines with vset to identify the set
    *             of edges to map over. For instance, if
    *             edir == IN_EDGES, map_reduce_edges will map over all in edges
    *             of the vertices in vset. Optional. Defaults to IN_EDGES.
    */
    template <typename ReductionType, typename FoldFunctionType>
    ReductionType fold_edges(FoldFunctionType foldfunction,
                                   const vertex_set& vset = complete_set(),
                                   edge_dir_type edir = IN_EDGES) {
      BOOST_CONCEPT_ASSERT((graphlab::Serializable<ReductionType>));
      BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<ReductionType>));
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to run graph.map_reduce_vertices(...)"
          << "\n\tbefore calling graph.finalize()."
          << std::endl;
      }

      rpc.barrier();
      bool global_result_set = false;
      ReductionType global_result = ReductionType();
#ifdef _OPENMP
#pragma omp parallel
#endif
      {
        ReductionType result = ReductionType();
#ifdef _OPENMP
        #pragma omp for
#endif
        for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
          if (vset.l_contains((lvid_type)i)) {
            if (edir == IN_EDGES || edir == ALL_EDGES) {
              foreach(const local_edge_type& e, l_vertex(i).in_edges()) {
                  edge_type edge(e);
                  foldfunction(edge, result);
              }
            }
            if (edir == OUT_EDGES || edir == ALL_EDGES) {
              foreach(const local_edge_type& e, l_vertex(i).out_edges()) {
                edge_type edge(e);
                foldfunction(edge, result);
              }
            }
          }
        }
#ifdef _OPENMP
        #pragma omp critical
#endif
        {
          if (!global_result_set) {
            global_result = result;
            global_result_set = true;
          }
          else {
            global_result += result;
          }
        }
      }

      conditional_addition_wrapper<ReductionType>
        wrapper(global_result, global_result_set);
      rpc.all_reduce(wrapper);
      return wrapper.value;
   } // end of map_reduce_edges


    /**
     * \brief Performs a transformation operation on each vertex in the graph.
     *
     * Given a mapfunction, transform_vertices() calls mapfunction on
     * every vertex in graph. The map function may make modifications
     * to the data on the vertex. transform_vertices() must be called by all
     * machines simultaneously.
     *
     * The optional vset argument may be used to restrict the set of vertices
     * operated upon.
     *
     * ### Basic Usage
     * For instance, if the graph has integer vertex data, and integer edge
     * data:
     * \code
     *   typedef graphlab::distributed_graph<size_t, size_t> graph_type;
     * \endcode
     *
     * To set each vertex value to be the number of out-going edges,
     * we may write the following function:
     * \code
     * void set_vertex_value(graph_type::vertex_type& vertex)i {
     *   vertex.data() = vertex.num_out_edges();
     * }
     * \endcode
     *
     * Calling transform_vertices():
     * \code
     *   graph.transform_vertices(set_vertex_value);
     * \endcode
     * will run the <code>set_vertex_value()</code> function
     * on each vertex in the graph, setting its new value.
     *
     * ### Relations
     * map_reduce_vertices() provide similar signalling functionality,
     * but should not make modifications to graph data.
     * graphlab::iengine::transform_vertices() provide
     * the same graph modification capabilities, but with a context
     * and thus can perform signalling.
     *
     * \tparam VertexMapperType The type of the map function.
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \param mapfunction The map function to use. Must take an
     *                   \ref icontext_type& as its first argument, and
     *                   a \ref vertex_type, or a reference to a
     *                   \ref vertex_type as its second argument.
     *                   Returns void.
     * \param vset The set of vertices to transform. Optional. Defaults to
     *             complete_set()
     */
    template <typename TransformType>
    void transform_vertices(TransformType transform_functor,
                            const vertex_set vset = complete_set()) {
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to call graph.transform_vertices(...)"
          << "\n\tbefore finalizing the graph."
          << std::endl;
      }

      rpc.barrier();
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
        if (lvid2record[i].owner == rpc.procid() &&
            vset.l_contains((lvid_type)i)) {
          vertex_type vtx(l_vertex(i));
          transform_functor(vtx);
        }
      }
      rpc.barrier();
      synchronize();
    }

    /**
     * \brief Performs a transformation operation on each edge in the graph.
     *
     * Given a mapfunction, transform_edges() calls mapfunction on
     * every edge in graph. The map function may make modifications
     * to the data on the edge. transform_edges() must be called on
     * all machines simultaneously.
     *
     * ### Basic Usage
     * For instance, if the graph has integer vertex data, and integer edge
     * data:
     * \code
     *   typedef graphlab::distributed_graph<size_t, size_t> graph_type;
     * \endcode
     *
     * To set each edge value to be the number of out-going edges
     * of the target vertex, we may write the following:
     * \code
     * void set_edge_value(graph_type::edge_type& edge) {
     *   edge.data() = edge.target().num_out_edges();
     * }
     * \endcode
     *
     * Calling transform_edges():
     * \code
     *   graph.transform_edges(set_edge_value);
     * \endcode
     * will run the <code>set_edge_value()</code> function
     * on each edge in the graph, setting its new value.
     *
     * The two optional arguments vset and edir may be used to restrict
     * the set of edges operated upon.
     *
     * ### Relations
     * map_reduce_edges() provide similar signalling functionality,
     * but should not make modifications to graph data.
     * graphlab::iengine::transform_edges() provide
     * the same graph modification capabilities, but with a context
     * and thus can perform signalling.
     *
     * \tparam EdgeMapperType The type of the map function.
     *                          Not generally needed.
     *                          Can be inferred by the compiler.
     * \param mapfunction The map function to use. Must take an
     *                   \ref icontext_type& as its first argument, and
     *                   a \ref edge_type, or a reference to a
     *                   \ref edge_type as its second argument.
     *                   Returns void.
     * \param vset A set of vertices. Combines with
     *             edir to identify the set of edges. For instance, if
     *             edir == IN_EDGES, map_reduce_edges will map over all in edges
     *             of the vertices in vset. Optional. Defaults to complete_set().
     * \param edir An edge direction. Combines with vset to identify the set
     *             of edges to map over. For instance, if
     *             edir == IN_EDGES, map_reduce_edges will map over all in edges
     *             of the vertices in vset. Optional. Defaults to IN_EDGES.
     */
    template <typename TransformType>
    void transform_edges(TransformType transform_functor,
                         const vertex_set& vset = complete_set(),
                         edge_dir_type edir = IN_EDGES) {
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to call graph.transform_edges(...)"
          << "\n\tbefore finalizing the graph."
          << std::endl;
      }
      rpc.barrier();
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
        if (vset.l_contains((lvid_type)i)) {
          if (edir == IN_EDGES || edir == ALL_EDGES) {
            foreach(const local_edge_type& e, l_vertex(i).in_edges()) {
              edge_type edge(e);
              transform_functor(edge);
            }
          }
          if (edir == OUT_EDGES || edir == ALL_EDGES) {
            foreach(const local_edge_type& e, l_vertex(i).out_edges()) {
              edge_type edge(e);
              transform_functor(edge);
            }
          }
        }
      }
      rpc.barrier();
    }

    // disable documentation for parallel_for stuff. These are difficult
    // to use properly by the user
    /// \cond GRAPHLAB_INTERNAL
    /**
     * \internal
     * parallel_for_vertices will partition the set of vertices among the
     * vector of accfunctions. Each accfunction is then executed sequentially
     * on the set of vertices it was assigned.
     *
      * \param accfunction must be a void function which takes a single
      * vertex_type argument. It may be a functor and contain state.
      * The function need not be reentrant as it is only called sequentially
     */
    template <typename VertexFunctorType>
    void parallel_for_vertices(std::vector<VertexFunctorType>& accfunction) {
      ASSERT_TRUE(finalized);
      rpc.barrier();
      int numaccfunctions = (int)accfunction.size();
      ASSERT_GE(numaccfunctions, 1);
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)accfunction.size(); ++i) {
        for (int j = i;j < (int)local_graph.num_vertices(); j+=numaccfunctions) {
          if (lvid2record[j].owner == rpc.procid()) {
            accfunction[i](vertex_type(l_vertex(j)));
          }
        }
      }
      rpc.barrier();
    }


    /**
     * \internal
     * parallel_for_edges will partition the set of edges among the
     * vector of accfunctions. Each accfunction is then executed sequentially
     * on the set of edges it was assigned.
     *
     * \param accfunction must be a void function which takes a single
     * edge_type argument. It may be a functor and contain state.
     * The function need not be reentrant as it is only called sequentially
     */
    template <typename EdgeFunctorType>
    void parallel_for_edges(std::vector<EdgeFunctorType>& accfunction) {
      ASSERT_TRUE(finalized);
      rpc.barrier();
      int numaccfunctions = (int)accfunction.size();
      ASSERT_GE(numaccfunctions, 1);
#ifdef _OPENMP
      #pragma omp parallel for
#endif
      for (int i = 0; i < (int)accfunction.size(); ++i) {
        for (int j = i;j < (int)local_graph.num_vertices(); j+=numaccfunctions) {
          foreach(const local_edge_type& e, l_vertex(j).in_edges()) {
            accfunction[i](edge_type(e));
          }
        }
      }
      rpc.barrier();
    }


    /** \brief Load the graph from an archive */
    void load(iarchive& arc) {
      // read the vertices
      arc >> nverts
          >> nedges
          >> local_own_nverts
          >> nreplicas
          >> vid2lvid
          >> lvid2record
          >> local_graph;
      finalized = true;
      // check the graph condition
    } // end of load


    /** \brief Save the graph to an archive */
    void save(oarchive& arc) const {
      if(!finalized) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to save a graph before calling graph.finalize()."
          << std::endl;
      }
      // Write the number of edges and vertices
      arc << nverts
          << nedges
          << local_own_nverts
          << nreplicas
          << vid2lvid
          << lvid2record
          << local_graph;
    } // end of save

    /// \endcond

    /// \brief Clears and resets the graph, releasing all memory used.
    void clear () {
      foreach (vertex_record& vrec, lvid2record)
        vrec.clear();
      lvid2record.clear();
      vid2lvid.clear();
      local_graph.clear();
      finalized=false;
      nverts = nedges = local_own_nverts = nreplicas = 0;
    }


    /** \brief Load a distributed graph from a native binary format
     * previously saved with save_binary(). This function must be called
     *  simultaneously on all machines.
     *
     * This function loads a sequence of files numbered
     * \li [prefix].0.gz
     * \li [prefix].1.gz
     * \li [prefix].2.gz
     * \li etc.
     *
     * These files must be previously saved using save_binary(), and
     * must be saved <b>using the same number of machines</b>.
     * This function uses the graphlab serialization system, so
     * the user must ensure that the vertex data and edge data
     * serialization formats have not changed since the graph was saved.
     *
     * A graph loaded using load_binary() is already finalized and
     * structure modifications are not permitted after loading.
     *
     * Return true on success and false on failure if the file cannot be loaded.
     */
    bool load_binary(const std::string& prefix) {
      rpc.full_barrier();
      std::string fname = prefix + tostr(rpc.procid()) + ".bin";

      logstream(LOG_INFO) << "Load graph from " << fname << std::endl;
      if(boost::starts_with(fname, "hdfs://")) {
        graphlab::hdfs hdfs;
        graphlab::hdfs::fstream in_file(hdfs, fname);
        boost::iostreams::filtering_stream<boost::iostreams::input> fin;
        fin.push(boost::iostreams::gzip_decompressor());
        fin.push(in_file);

        if(!fin.good()) {
          logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
          return false;
        }
        iarchive iarc(fin);
        iarc >> *this;
        fin.pop();
        fin.pop();
        in_file.close();
      } else {
        std::ifstream in_file(fname.c_str(),
                              std::ios_base::in | std::ios_base::binary);
        if(!in_file.good()) {
          logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
          return false;
        }
        boost::iostreams::filtering_stream<boost::iostreams::input> fin;
        fin.push(boost::iostreams::gzip_decompressor());
        fin.push(in_file);
        iarchive iarc(fin);
        iarc >> *this;
        fin.pop();
        fin.pop();
        in_file.close();
      }
      logstream(LOG_INFO) << "Finish loading graph from " << fname << std::endl;
      rpc.full_barrier();
      return true;
    } // end of load


    /** \brief Saves a distributed graph to a native binary format
     * which can be loaded with load_binary(). This function must be called
     *  simultaneously on all machines.
     *
     * This function saves a sequence of files numbered
     * \li [prefix].0.gz
     * \li [prefix].1.gz
     * \li [prefix].2.gz
     * \li etc.
     *
     * This files can be loaded with load_binary() using the <b> same number
     * of machines</b>.
     * This function uses the graphlab serialization system, so
     * the vertex data and edge data serialization formats must not
     * change between the use of save_binary() and load_binary().
     *
     * If the graph is not alreasy finalized before save_binary() is called,
     * this function will finalize the graph.
     *
     * Returns true on success, and false if the graph cannot be loaded from
     * the specified file.
     */
    bool save_binary(const std::string& prefix) {
      rpc.full_barrier();
      finalize();
      timer savetime;  savetime.start();
      std::string fname = prefix + tostr(rpc.procid()) + ".bin";
      logstream(LOG_INFO) << "Save graph to " << fname << std::endl;
      if(boost::starts_with(fname, "hdfs://")) {
        graphlab::hdfs hdfs;
        graphlab::hdfs::fstream out_file(hdfs, fname, true);
        boost::iostreams::filtering_stream<boost::iostreams::output> fout;
        fout.push(boost::iostreams::gzip_compressor());
        fout.push(out_file);
        if (!fout.good()) {
          logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
          return false;
        }
        oarchive oarc(fout);
        oarc << *this;
        fout.pop();
        fout.pop();
        out_file.close();
      } else {
        std::ofstream out_file(fname.c_str(),
                               std::ios_base::out | std::ios_base::binary);
        if (!out_file.good()) {
          logstream(LOG_ERROR) << "\n\tError opening file: " << fname << std::endl;
          return false;
        }
        boost::iostreams::filtering_stream<boost::iostreams::output> fout;
        fout.push(boost::iostreams::gzip_compressor());
        fout.push(out_file);
        oarchive oarc(fout);
        oarc << *this;
        fout.pop();
        fout.pop();
        out_file.close();
      }
      logstream(LOG_INFO) << "Finish saving graph to " << fname << std::endl
                          << "Finished saving binary graph: "
                          << savetime.current_time() << std::endl;
      rpc.full_barrier();
      return true;
    } // end of save


    /**
     * \brief Saves the graph to the filesystem using a provided Writer object.
     * Like \ref save(const std::string& prefix, writer writer, bool gzip, bool save_vertex, bool save_edge, size_t files_per_machine) "save()"
     * but only saves to local filesystem.
     */
    template<typename Writer>
    void save_to_posixfs(const std::string& prefix, Writer writer,
                         bool gzip = true,
                         bool save_vertex = true,
                         bool save_edge = true,
                         size_t files_per_machine = 4) {
      typedef boost::function<void(vertex_type)> vertex_function_type;
      typedef boost::function<void(edge_type)> edge_function_type;
      typedef std::ofstream base_fstream_type;
      typedef boost::iostreams::filtering_stream<boost::iostreams::output>
        boost_fstream_type;
      rpc.full_barrier();
      finalize();
      // figure out the filenames
      std::vector<std::string> graph_files;
      std::vector<base_fstream_type*> outstreams;
      std::vector<boost_fstream_type*> booststreams;
      graph_files.resize(files_per_machine);
      for(size_t i = 0; i < files_per_machine; ++i) {
        //graph_files[i] = prefix + "_" + tostr(1 + i + rpc.procid() * files_per_machine)
        graph_files[i] = prefix + "." + tostr(1 + i + rpc.procid() * files_per_machine)
          + "_of_" + tostr(rpc.numprocs() * files_per_machine);
        if (gzip) graph_files[i] += ".gz";
      }

      // create the vector of callbacks
      std::vector<vertex_function_type> vertex_callbacks(graph_files.size());
      std::vector<edge_function_type> edge_callbacks(graph_files.size());

      for(size_t i = 0; i < graph_files.size(); ++i) {
        logstream(LOG_INFO) << "Saving to file: " << graph_files[i] << std::endl;
        // open the stream
        base_fstream_type* out_file =
          new base_fstream_type(graph_files[i].c_str(),
                                std::ios_base::out | std::ios_base::binary);
        // attach gzip if the file is gzip
        boost_fstream_type* fout = new boost_fstream_type;
        // Using gzip filter
        if (gzip) fout->push(boost::iostreams::gzip_compressor());
        fout->push(*out_file);

        outstreams.push_back(out_file);
        booststreams.push_back(fout);
        // construct the callback for the parallel for
        typedef distributed_graph<vertex_data_type, edge_data_type> graph_type;
        vertex_callbacks[i] =
          boost::bind(&graph_type::template save_vertex_to_stream<boost_fstream_type, Writer>,
                      this, _1, boost::ref(*fout), boost::ref(writer));
        edge_callbacks[i] =
          boost::bind(&graph_type::template save_edge_to_stream<boost_fstream_type, Writer>,
                      this, _1, boost::ref(*fout), boost::ref(writer));
      }

      if (save_vertex) parallel_for_vertices(vertex_callbacks);
      if (save_edge) parallel_for_edges(edge_callbacks);

      // cleanup
      for(size_t i = 0; i < graph_files.size(); ++i) {
        booststreams[i]->pop();
        if (gzip) booststreams[i]->pop();
        delete booststreams[i];
        delete outstreams[i];
      }
      vertex_callbacks.clear();
      edge_callbacks.clear();
      outstreams.clear();
      booststreams.clear();
      rpc.full_barrier();
    } // end of save to posixfs


    /**
     * \brief Saves the graph to HDFS using a provided Writer object.
     * Like \ref save(const std::string& prefix, writer writer, bool gzip, bool save_vertex, bool save_edge, size_t files_per_machine) "save()"
     * but only saves to HDFS.
     */
    template<typename Writer>
    void save_to_hdfs(const std::string& prefix, Writer writer,
                      bool gzip = true,
                      bool save_vertex = true,
                      bool save_edge = true,
                      size_t files_per_machine = 4) {
      typedef boost::function<void(vertex_type)> vertex_function_type;
      typedef boost::function<void(edge_type)> edge_function_type;
      typedef graphlab::hdfs::fstream base_fstream_type;
      typedef boost::iostreams::filtering_stream<boost::iostreams::output>
        boost_fstream_type;
      rpc.full_barrier();
      finalize();
      // figure out the filenames
      std::vector<std::string> graph_files;
      std::vector<base_fstream_type*> outstreams;
      std::vector<boost_fstream_type*> booststreams;
      graph_files.resize(files_per_machine);
      for(size_t i = 0; i < files_per_machine; ++i) {
        graph_files[i] = prefix + "_" + tostr(1 + i + rpc.procid() * files_per_machine)
          + "_of_" + tostr(rpc.numprocs() * files_per_machine);
        if (gzip) graph_files[i] += ".gz";
      }

      if(!hdfs::has_hadoop()) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to save a graph to HDFS but GraphLab"
          << "\n\twas built without HDFS."
          << std::endl;
      }
      hdfs& hdfs = hdfs::get_hdfs();

      // create the vector of callbacks

      std::vector<vertex_function_type> vertex_callbacks(graph_files.size());
      std::vector<edge_function_type> edge_callbacks(graph_files.size());

      for(size_t i = 0; i < graph_files.size(); ++i) {
        logstream(LOG_INFO) << "Saving to file: " << graph_files[i] << std::endl;
        // open the stream
        base_fstream_type* out_file = new base_fstream_type(hdfs,
                                                            graph_files[i],
                                                            true);
        // attach gzip if the file is gzip
        boost_fstream_type* fout = new boost_fstream_type;
        // Using gzip filter
        if (gzip) fout->push(boost::iostreams::gzip_compressor());
        fout->push(*out_file);

        outstreams.push_back(out_file);
        booststreams.push_back(fout);
        // construct the callback for the parallel for
        typedef distributed_graph<vertex_data_type, edge_data_type> graph_type;
        vertex_callbacks[i] =
          boost::bind(&graph_type::template save_vertex_to_stream<boost_fstream_type, Writer>,
                      this, _1, boost::ref(*fout), writer);
        edge_callbacks[i] =
          boost::bind(&graph_type::template save_edge_to_stream<boost_fstream_type, Writer>,
                      this, _1, boost::ref(*fout), writer);
      }

      if (save_vertex) parallel_for_vertices(vertex_callbacks);
      if (save_edge) parallel_for_edges(edge_callbacks);

      // cleanup
      for(size_t i = 0; i < graph_files.size(); ++i) {
        booststreams[i]->pop();
        if (gzip) booststreams[i]->pop();
        delete booststreams[i];
        delete outstreams[i];
      }
      vertex_callbacks.clear();
      edge_callbacks.clear();
      outstreams.clear();
      booststreams.clear();
      rpc.full_barrier();
    } // end of save to hdfs


    /**
     * \brief Saves the graph to the filesystem or to HDFS using
     *  a user provided Writer object. This function should be called on
     *  all machines simultaneously.
     *
     * This function saves the current graph to disk using a user provided
     * Writer object. The writer object must implement two functions:
     * \code
     * std::string Writer::save_vertex(graph_type::vertex_type v);
     * std::string Writer::save_edge(graph_type::edge_type e);
     * \endcode
     *
     * The <code>save_vertex()</code> function will be called on each vertex
     * on the graph, and the output of the function is written to file.
     * Similarly, the <code>save_edge()</code> function is called on each edge
     * in the graph and the output written to file.
     *
     * For instance, a simple Writer object which saves a file containing
     * a list of edges will be:
     * \code
     * struct edge_list_writer {
     *   std::string save_vertex(vertex_type) { return ""; }
     *   std::string save_edge(edge_type e) {
     *     char c[128];
     *     sprintf(c, "%u\t%u\n", e.source().id(), e.target().id());
     *     return c;
     *   }
     * };
     * \endcode
     * The save_edge() function is called on each edge in the graph. It then
     * constructs a string containing "[source] \\t [target] \\n" and returns
     * the string.
     *
     * This can also be used to data in human readable format. For instance,
     * if the vertex data type is a floating point number (say a PageRank
     * value), to save a list of vertices and their corresponding PageRanks,
     * the following writer could be implemented:
     * \code
     * struct pagerank_writer {
     *   std::string save_vertex(vertex_type v) {
     *     char c[128];
     *     sprintf(c, "%u\t%f\n", v.id(), v.data());
     *     return c;
     *   }
     *   std::string save_edge(edge_type) {}
     * };
     * \endcode
     * \note Note that these is not an example a reliable parser since sprintf
     *  may break if the size of vertex_id_type changes
     *
     * The output files will be written in
     * \li [prefix]_1_of_16.gz
     * \li [prefix]_2_of_16.gz
     * \li [prefix].3_of_16.gz
     * \li etc.
     *
     * To accelerate the saving process, multiple files are be written
     * per machine in parallel. If the gzip option is not set, the ".gz" suffix
     * is not added.
     *
     * For instance, if there are 4 machines, running:
     * \code
     *   save("test_graph", pagerank_writer);
     * \endcode
     * Will create the files
     * \li test_graph_1_of_16.gz
     * \li test_graph_2_of_16.gz
     * \li ...
     * \li test_graph_16_of_16.gz
     *
     * If HDFS support is compiled in, this function can save to HDFS by
     * adding "hdfs://" to the prefix.
     *
     * For instance, if there are 4 machines, running:
     * \code
     *   save("hdfs:///hdfs_server/data/test_graph", pagerank_writer);
     * \endcode
     * Will create on the HDFS server, the files
     * \li /data/test_graph_1_of_16.gz
     * \li /data/test_graph_2_of_16.gz
     * \li ...
     * \li /data/test_graph_16_of_16.gz
     *
     * \tparam Writer The writer object type. This is generally inferred by the
     *                compiler and need not be specified.
     *
     * \param prefix The file prefix to save the output graph files. The output
     *               files will be numbered [prefix].0 , [prefix].1 , etc.
     *               If prefix begins with "hdfs://", the output is written to
     *               HDFS
     * \param writer The writer object to use.
     * \param gzip If gzip compression should be used. If set, all files will be
     *             appended with the .gz suffix. Defaults to true.
     * \param save_vertex If vertices should be saved. Defaults to true.
     * \param save_edges If edges should be saved. Defaults to true.
     * \param files_per_machine Number of files to write simultaneously in
     *                          parallel per machine. Defaults to 4.
     */
    template<typename Writer>
    void save(const std::string& prefix, Writer writer,
              bool gzip = true, bool save_vertex = true, bool save_edge = true,
              size_t files_per_machine = 4) {
      if(boost::starts_with(prefix, "hdfs://")) {
        save_to_hdfs(prefix, writer, gzip, save_vertex, save_edge, files_per_machine);
      } else {
        save_to_posixfs(prefix, writer, gzip, save_vertex, save_edge, files_per_machine);
      }
    } // end of save


    /**
     * \brief Saves the graph in the specified format. This function should be
     * called on all machines simultaneously.
     *
     * The output files will be written in
     * \li [prefix].0.gz
     * \li [prefix].1.gz
     * \li [prefix].2.gz
     * \li etc.
     *
     * To accelerate the saving process, multiple files are be written
     * per machine in parallel. If the gzip option is not set, the ".gz" suffix
     * is not added.
     *
     * For instance, if there are 4 machines, running:
     * \code
     *   save_format("test_graph", "tsv");
     * \endcode
     * Will create the files
     * \li test_graph_0.gz
     * \li test_graph_1.gz
     * \li ...
     * \li test_graph_15.gz
     *
     * The supported formats are described in \ref graph_formats.
     *
     * \param prefix The file prefix to save the output graph files. The output
     *               files will be numbered [prefix].0 , [prefix].1 , etc.
     *               If prefix begins with "hdfs://", the output is written to
     *               HDFS.
     * \param format The file format to save in.
     *               Either "tsv", "snap", "graphjrl" or "bin".
     * \param gzip If gzip compression should be used. If set, all files will be
     *             appended with the .gz suffix. Defaults to true. Ignored
     *             if format == "bin".
     * \param files_per_machine Number of files to write simultaneously in
     *                          parallel per machine. Defaults to 4. Ignored if
     *                          format == "bin".
     */
    void save_format(const std::string& prefix, const std::string& format,
                        bool gzip = true, size_t files_per_machine = 4) {
      if (format == "snap" || format == "tsv") {
        save(prefix, builtin_parsers::tsv_writer<distributed_graph>(),
             gzip, false, true, files_per_machine);
      } else if (format == "graphjrl") {
         save(prefix, builtin_parsers::graphjrl_writer<distributed_graph>(),
             gzip, true, true, files_per_machine);
      } else if (format == "bin") {
         save_binary(prefix);
      } else if (format == "bintsv4") {
         save_direct(prefix, gzip, &graph_type::save_bintsv4_to_stream);
      } else {
        logstream(LOG_FATAL)
          << "Unrecognized Format \"" << format << "\"!" << std::endl;
        return;
      }
    } // end of save structure


    /**
     *  \brief Load a graph from a collection of files in stored on
     *  the filesystem using the user defined line parser. Like
     *  \ref load(const std::string& path, line_parser_type line_parser)
     *  but only loads from the filesystem.
     */
    void load_from_posixfs(std::string prefix,
                           line_parser_type line_parser) {
      std::string directory_name; std::string original_path(prefix);
      boost::filesystem::path path(prefix);
      std::string search_prefix;
      if (boost::filesystem::is_directory(path)) {
        // if this is a directory
        // force a "/" at the end of the path
        // make sure to check that the path is non-empty. (you do not
        // want to make the empty path "" the root path "/" )
        directory_name = path.native();
      }
      else {
        directory_name = path.parent_path().native();
        search_prefix = path.filename().native();
        directory_name = (directory_name.empty() ? "." : directory_name);
      }
      std::vector<std::string> graph_files;
      fs_util::list_files_with_prefix(directory_name, search_prefix, graph_files);
      if (graph_files.size() == 0) {
        logstream(LOG_WARNING) << "No files found matching " << original_path << std::endl;
      }

#ifdef _OPENMP
#pragma omp parallel for
#endif
      for(size_t i = 0; i < graph_files.size(); ++i) {
        if ((parallel_ingress && (i % rpc.numprocs() == rpc.procid()))
            || (!parallel_ingress && (rpc.procid() == 0))) {
          logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl;
          // is it a gzip file ?
          const bool gzip = boost::ends_with(graph_files[i], ".gz");
          // open the stream
          std::ifstream in_file(graph_files[i].c_str(),
                                std::ios_base::in | std::ios_base::binary);
          // attach gzip if the file is gzip
          boost::iostreams::filtering_stream<boost::iostreams::input> fin;
          // Using gzip filter
          if (gzip) fin.push(boost::iostreams::gzip_decompressor());
          fin.push(in_file);
          const bool success = load_from_stream(graph_files[i], fin, line_parser);
          if(!success) {
            logstream(LOG_FATAL)
              << "\n\tError parsing file: " << graph_files[i] << std::endl;
          }
          fin.pop();
          if (gzip) fin.pop();
        }
      }
      rpc.full_barrier();
    } // end of load from posixfs

    /**
     *  \brief Load a graph from a collection of files in stored on
     *  the HDFS using the user defined line parser. Like
     *  \ref load(const std::string& path, line_parser_type line_parser)
     *  but only loads from HDFS.
     */
    void load_from_hdfs(std::string prefix, line_parser_type line_parser) {
      // force a "/" at the end of the path
      // make sure to check that the path is non-empty. (you do not
      // want to make the empty path "" the root path "/" )
      std::string path = prefix;
      if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
      if(!hdfs::has_hadoop()) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to load a graph from HDFS but GraphLab"
          << "\n\twas built without HDFS."
          << std::endl;
      }
      hdfs& hdfs = hdfs::get_hdfs();
      std::vector<std::string> graph_files;
      graph_files = hdfs.list_files(path);
      if (graph_files.size() == 0) {
        logstream(LOG_WARNING) << "No files found matching " << prefix << std::endl;
      }
#ifdef _OPENMP
#pragma omp parallel for
#endif
      for(size_t i = 0; i < graph_files.size(); ++i) {
        if ((parallel_ingress && (i % rpc.numprocs() == rpc.procid())) ||
            (!parallel_ingress && (rpc.procid() == 0))) {
          logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl;
          // is it a gzip file ?
          const bool gzip = boost::ends_with(graph_files[i], ".gz");
          // open the stream
          graphlab::hdfs::fstream in_file(hdfs, graph_files[i]);
          boost::iostreams::filtering_stream<boost::iostreams::input> fin;
          if(gzip) fin.push(boost::iostreams::gzip_decompressor());
          fin.push(in_file);
          const bool success = load_from_stream(graph_files[i], fin, line_parser);
          if(!success) {
            logstream(LOG_FATAL)
              << "\n\tError parsing file: " << graph_files[i] << std::endl;
          }
          fin.pop();
          if (gzip) fin.pop();
        }
      }
      rpc.full_barrier();
    } // end of load from hdfs


    /**
     *  \brief Load a the graph from a given path using a user defined
     *  line parser. This function should be called on all machines
     *  simultaneously.
     *
     *  This functions loads all files in the filesystem or on HDFS matching
     *  the pattern "[prefix]*".
     *
     *  Examples:
     *
     *  <b> prefix = "webgraph.txt" </b>
     *
     *  will load the file webgraph.txt if such a file exists. It will also
     *  load all files in the current directory which begins with "webgraph.txt".
     *  For instance, webgraph.txt.0, webgraph.txt.1, etc.
     *
     *  <b>prefix = "graph/data"</b>
     *
     *  will load all files in the "graph" directory which begin with "data"
     *
     *  <b> prefix = "hdfs:///hdfs_server/graph/data" </b>
     *
     *  will load all files from the HDFS server in the "/graph/" directory
     *  which begin with "data".
     *
     *  If files have the ".gz" suffix, it is automatically decompressed.
     *
     *  The line_parser is a user defined function matching the following
     *  prototype:
     *
     *  \code
     *  bool parser(graph_type& graph,
     *              const std::string& filename,
     *              const std::string& line);
     *  \endcode
     *
     *  The load() function will call the parser one line at a time, and the
     *  paser function should process the line and call add_vertex / add_edge
     *  functions in the graph. It should return true on success, and false
     *  on failure. Since the parsing may be parallelized,
     *  the parser should treat each line independently
     *  and not depend on a sequential pass through a file.
     *
     *  For instance, if the graph is in a simple edge list format, a parser
     *  could be:
     *  \code
     *  bool edge_list_parser(graph_type& graph,
     *                        const std::string& filename,
     *                        const std::string& line) {
     *    if (line.empty()) return true;
     *    vertex_id_type source, target;
     *    if (sscanf(line.c_str(), "%u %u", source, target) < 2) {
     *      // parsed less than 2 objects, failure.
     *      return false;
     *    }
     *    else {
     *      graph.add_edge(source, target);
     *      return true;
     *    }
     *  }
     *  \endcode
     *  \note Note that this is not an example a reliable parser since sscanf
     *  may break if the size of vertex_id_type changes
     *
     *  \param prefix The file prefix to read from. All files matching
     *                the pattern "[prefix]*" are loaded. If prefix begins with
     *                "hdfs://" the files are read from hdfs.
     *  \param line_parser A user defined parsing function
     */
    void load(std::string prefix, line_parser_type line_parser) {
      rpc.full_barrier();
      if (prefix.length() == 0) return;
      if(boost::starts_with(prefix, "hdfs://")) {
        load_from_hdfs(prefix, line_parser);
      } else {
        load_from_posixfs(prefix, line_parser);
      }
      rpc.full_barrier();
    } // end of load

    /**
     * \brief Constructs a synthetic power law graph. Must be called on
     * all machines simultaneously.
     *
     * This function constructs a synthetic out-degree power law of "nverts"
     * vertices with a particular alpha parameter.
     * In other words, the probability that a vertex has out-degree \f$d\f$,
     * is given by:
     *
     * \f[ P(d) \propto d^{-\alpha} \f]
     *
     * By default, the out-degree distribution of each vertex
     * will have power-law distribution, but the in-degrees will be nearly
     * uniform. This can be reversed by setting the second argument "in_degree"
     * to true.
     *
     * \param nverts Number of vertices to generate
     * \param in_degree If set to true, the graph will have power-law in-degree.
     *                  Defaults to false.
     * \param alpha The alpha parameter in the power law distribution. Defaults
     *              to 2.1
     * \param truncate Limits the maximum degree of any vertex. (thus generating
     *                 a truncated power-law distribution). Necessary
     *                 for large number of vertices (hundreds of millions)
     *                 since this function allocates a PDF vector of
     *                 "nverts" to sample from.
     */
    void load_synthetic_powerlaw(size_t nverts, bool in_degree = false,
                                 double alpha = 2.1, size_t truncate = (size_t)(-1)) {
      rpc.full_barrier();
      std::vector<double> prob(std::min(nverts, truncate), 0);
      logstream(LOG_INFO) << "constructing pdf" << std::endl;
      for(size_t i = 0; i < prob.size(); ++i)
        prob[i] = std::pow(double(i+1), -alpha);
      logstream(LOG_INFO) << "constructing cdf" << std::endl;
      random::pdf2cdf(prob);
      logstream(LOG_INFO) << "Building graph" << std::endl;
      size_t target_index = rpc.procid();
      size_t addedvtx = 0;

      // A large prime number
      const size_t HASH_OFFSET = 2654435761;
      for(size_t source = rpc.procid(); source < nverts;
          source += rpc.numprocs()) {
        const size_t out_degree = random::multinomial_cdf(prob) + 1;
        for(size_t i = 0; i < out_degree; ++i) {
          target_index = (target_index + HASH_OFFSET)  % nverts;
          while (source == target_index) {
            target_index = (target_index + HASH_OFFSET)  % nverts;
          }
          if(in_degree) add_edge(target_index, source);
          else add_edge(source, target_index);
        }
        ++addedvtx;
        if (addedvtx % 10000000 == 0) {
          logstream(LOG_EMPH) << addedvtx << " inserted\n";
        }
      }
      rpc.full_barrier();
    } // end of load random powerlaw


    /**
     *  \brief load a graph with a standard format. Must be called on all
     *  machines simultaneously.
     *
     *  The supported graph formats are described in \ref graph_formats.
     */
    void load_format(const std::string& path, const std::string& format) {
      line_parser_type line_parser;
      if (format == "snap") {
        line_parser = builtin_parsers::snap_parser<distributed_graph>;
        load(path, line_parser);
      } else if (format == "adj") {
        line_parser = builtin_parsers::adj_parser<distributed_graph>;
        load(path, line_parser);
      } else if (format == "tsv") {
        line_parser = builtin_parsers::tsv_parser<distributed_graph>;
        load(path, line_parser);
      } else if (format == "csv") {
        line_parser = builtin_parsers::csv_parser<distributed_graph>;
        load(path, line_parser);
      } else if (format == "graphjrl") {
        line_parser = builtin_parsers::graphjrl_parser<distributed_graph>;
        load(path, line_parser);
      } else if (format == "bintsv4") {
         load_direct(path,&graph_type::load_bintsv4_from_stream);
      } else if (format == "bin") {
         load_binary(path);
      } else {
        logstream(LOG_ERROR)
          << "Unrecognized Format \"" << format << "\"!" << std::endl;
        return;
      }
    } // end of load


/****************************************************************************
 *                     Vertex Set Functions                                 *
 *                     ----------------------                               *
 * Manages operations involving sets of vertices                            *
 ****************************************************************************/

   /**
    *  \brief Retuns an empty set of vertices
    */
   static vertex_set empty_set() {
     return vertex_set(false);
   }

   /**
    *  \brief Retuns a full set of vertices
    */
   static vertex_set complete_set() {
     return vertex_set(true);
   }

   ///
   vertex_set neighbors(const vertex_set& cur,
                        edge_dir_type edir) {
     // foreach master bit which is set, set its corresponding mirror
     // synchronize master to mirrors
     vertex_set ret(empty_set());
     ret.make_explicit(*this);

     foreach(size_t lvid, cur.get_lvid_bitset(*this)) {
       if (edir == IN_EDGES || edir == ALL_EDGES) {
         foreach(local_edge_type e, l_vertex(lvid).in_edges()) {
           ret.set_lvid_unsync(e.source().id());
         }
       }
       if (edir == OUT_EDGES || edir == ALL_EDGES) {
         foreach(local_edge_type e, l_vertex(lvid).out_edges()) {
           ret.set_lvid_unsync(e.target().id());
         }
       }
     }
     ret.synchronize_mirrors_to_master_or(*this, vset_exchange);
     ret.synchronize_master_to_mirrors(*this, vset_exchange);
     return ret;
   }


   /**
    * \brief Constructs a vertex set from a predicate operation which
    * is executed on each vertex.
    *
    * This function selects a subset of vertices on which the predicate
    * evaluates to true.
    For instance if vertices contain an integer, the following
    * code will construct a set of vertices containing only vertices with data
    * which are a multiple of 2.
    *
    * \code
    * bool is_multiple_of_2(const graph_type::vertex_type& vertex) {
    *   return vertex.data() % 2 == 0;
    * }
    * vertex_set even_vertices = graph.select(is_multiple_of_2);
    * \endcode
    *
    * select() also takes a second argument which restricts the set of vertices
    * queried. For instance,
    * \code
    * bool is_multiple_of_3(const graph_type::vertex_type& vertex) {
    *   return vertex.data() % 3 == 0;
    * }
    * vertex_set div_6_vertices = graph.select(is_multiple_of_3, even_vertices);
    * \endcode
    * will select from the set of even vertices, all vertices which are also
    * divisible by 3. The resultant set is therefore the set of all vertices
    * which are divisible by 6.
    *
    * \param select_functor A function/functor which takes a
    *                       const vertex_type& argument and returns a boolean
    *                       denoting of the vertex is to be included in the
    *                       returned set
    * \param vset Optional. The set of vertices to evaluate the selection on.
    *                       Defaults to complete_set()
    */
   template <typename FunctionType>
   vertex_set select(FunctionType select_functor,
                     const vertex_set& vset = complete_set()) {
     vertex_set ret(empty_set());

     ret.make_explicit(*this);
#ifdef _OPENMP
        #pragma omp for
#endif
     for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
       if (lvid2record[i].owner == rpc.procid() &&
           vset.l_contains((lvid_type)i)) {
         const vertex_type vtx(l_vertex(i));
         if (select_functor(vtx)) ret.set_lvid(i);
       }
     }
     ret.synchronize_master_to_mirrors(*this, vset_exchange);
     return ret;
   }

   void sync_vertex_set_master_to_mirrors(vertex_set& vset) {
     vset.synchronize_master_to_mirrors(*this, vset_exchange);
   }

   /**
    * \brief Returns the number of vertices in a vertex set.
    *
    * This function must be called on all machines and returns the number of
    * vertices contained in the vertex set.
    *
    * For instance:
    * \code
    *   graph.vertex_set_size(graph.complete_set());
    * \endcode
    * will always evaluate to graph.num_vertices();
    */
   size_t vertex_set_size(const vertex_set& vset) {
     size_t count = 0;
     for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
        count += (lvid2record[i].owner == rpc.procid() &&
                  vset.l_contains((lvid_type)i));
     }
     rpc.all_reduce(count);
     return count;
   }


   /**
    * \brief Returns true if the vertex set is empty
    *
    * This function must be called on all machines and returns
    * true if the vertex set is empty
    */
   bool vertex_set_empty(const vertex_set& vset) {
     if (vset.lazy) return !vset.is_complete_set;

     size_t count = vset.get_lvid_bitset(*this).empty();
     rpc.all_reduce(count);
     return count == rpc.numprocs();
   }
/****************************************************************************
 *                       Internal Functions                                 *
 *                     ----------------------                               *
 * These functions functions and types provide internal access to the       *
 * underlying graph representation. They should not be used unless you      *
 * *really* know what you are doing.                                        *
 ****************************************************************************/


    /**
     * \internal
     * The vertex record stores information associated with each
     * vertex on this proc
     */
    struct vertex_record {
      /// The official owning processor for this vertex
      procid_t owner;
      /// The local vid of this vertex on this proc
      vertex_id_type gvid;
      /// The number of in edges
      vertex_id_type num_in_edges, num_out_edges;
      /** The set of proc that mirror this vertex.  The owner should
          NOT be in this set.*/
      mirror_type _mirrors;
      vertex_record() :
        owner(-1), gvid(-1), num_in_edges(0), num_out_edges(0) { }
      vertex_record(const vertex_id_type& vid) :
        owner(-1), gvid(vid), num_in_edges(0), num_out_edges(0) { }
      procid_t get_owner () const { return owner; }
      const mirror_type& mirrors() const { return _mirrors; }
      size_t num_mirrors() const { return _mirrors.popcount(); }

      void clear() {
        _mirrors.clear();
      }

      void load(iarchive& arc) {
        clear();
        arc >> owner
            >> gvid
            >> num_in_edges
            >> num_out_edges
            >> _mirrors;
      }

      void save(oarchive& arc) const {
        arc << owner
            << gvid
            << num_in_edges
            << num_out_edges
            << _mirrors;
      } // end of save

      bool operator==(const vertex_record& other) const {
        return (
            (owner == other.owner) &&
            (gvid == other.gvid)  &&
            (num_in_edges == other.num_in_edges) &&
            (num_out_edges == other.num_out_edges) && 
            (_mirrors == other._mirrors)
            );
      }
    }; // end of vertex_record


    /** \internal
     * \brief converts a local vertex ID to a local vertex object
     */
    local_vertex_type l_vertex(lvid_type vid) {
      return local_vertex_type(*this, vid);
    }

    /** \internal
     *\brief Get the Total number of vertex replicas in the graph */
    size_t num_replicas() const { return nreplicas; }

    /** \internal
     *\brief Get the number of vertices local to this proc */
    size_t num_local_vertices() const { return local_graph.num_vertices(); }

    /** \internal
     *\brief Get the number of edges local to this proc */
    size_t num_local_edges() const { return local_graph.num_edges(); }

    /** \internal
     *\brief Get the number of vertices owned by this proc */
    size_t num_local_own_vertices() const { return local_own_nverts; }

    /** \internal
     *\brief Convert a global vid to a local vid */
    lvid_type local_vid (const vertex_id_type vid) const {
      // typename boost::unordered_map<vertex_id_type, lvid_type>::
      //   const_iterator iter = vid2lvid.find(vid);
      typename hopscotch_map_type::const_iterator iter = vid2lvid.find(vid);
      return iter->second;
    } // end of local_vertex_id

    /** \internal
     *\brief Convert a local vid to a global vid */
    vertex_id_type global_vid(const lvid_type lvid) const {
      ASSERT_LT(lvid, lvid2record.size());
      return lvid2record[lvid].gvid;
    } // end of global_vertex_id


    /** \internal
     * \brief Returns true if the local graph as an instance of (master or mirror)
     * of the vertex ID.
     */
    bool contains_vertex(const vertex_id_type vid) const {
      return vid2lvid.find(vid) != vid2lvid.end();
    }
    /**
     * \internal
     * \brief Returns an edge list of all in edges of a local vertex ID
     *        on the local graph
     *
     * Equivalent to l_vertex(lvid).in_edges()
     */
    local_edge_list_type l_in_edges(const lvid_type lvid) {
      return local_edge_list_type(*this, local_graph.in_edges(lvid));
    }

    /**
     * \internal
     * \brief Returns the number of in edges of a local vertex ID
     *        on the local graph
     *
     * Equivalent to l_vertex(lvid).num in_edges()
     */
    size_t l_num_in_edges(const lvid_type lvid) const {
      return local_graph.num_in_edges(lvid);
    }

    /**
     * \internal
     * \brief Returns an edge list of all out edges of a local vertex ID
     *        on the local graph
     *
     * Equivalent to l_vertex(lvid).out_edges()
     */
    local_edge_list_type l_out_edges(const lvid_type lvid) {
      return local_edge_list_type(*this, local_graph.out_edges(lvid));
    }

    /**
     * \internal
     * \brief Returns the number of out edges of a local vertex ID
     *        on the local graph
     *
     * Equivalent to l_vertex(lvid).num out_edges()
     */
    size_t l_num_out_edges(const lvid_type lvid) const {
      return local_graph.num_out_edges(lvid);
    }

    procid_t procid() const {
      return rpc.procid();
    }


    procid_t numprocs() const {
      return rpc.numprocs();
    }

    distributed_control& dc() {
      return rpc.dc();
    }


    /** \internal
     * \brief Returns the internal vertex record of a given global vertex ID
     */
    const vertex_record& get_vertex_record(vertex_id_type vid) const {
      // typename boost::unordered_map<vertex_id_type, lvid_type>::
      //   const_iterator iter = vid2lvid.find(vid);
      typename hopscotch_map_type::const_iterator iter = vid2lvid.find(vid);
      ASSERT_TRUE(iter != vid2lvid.end());
      return lvid2record[iter->second];
    }

    /** \internal
     * \brief Returns the internal vertex record of a given local vertex ID
     */
    vertex_record& l_get_vertex_record(lvid_type lvid) {
      ASSERT_LT(lvid, lvid2record.size());
      return lvid2record[lvid];
    }

    /** \internal
     * \brief Returns the internal vertex record of a given local vertex ID
     */
    const vertex_record& l_get_vertex_record(lvid_type lvid) const {
      ASSERT_LT(lvid, lvid2record.size());
      return lvid2record[lvid];
    }

    /** \internal
     * \brief Returns true if the provided global vertex ID is a
     *        master vertex on this machine and false otherwise.
     */
    bool is_master(vertex_id_type vid) const {
      const procid_t owning_proc = graph_hash::hash_vertex(vid) % rpc.numprocs();
      return (owning_proc == rpc.procid());
    }


    procid_t master(vertex_id_type vid) const {
      const procid_t owning_proc = graph_hash::hash_vertex(vid) % rpc.numprocs();
      return owning_proc;
    }

    /** \internal
     * \brief Returns true if the provided local vertex ID is a master vertex.
     *        Returns false otherwise.
     */
    bool l_is_master(lvid_type lvid) const {
      ASSERT_LT(lvid, lvid2record.size());
      return lvid2record[lvid].owner == rpc.procid();
    }

    /** \internal
     * \brief Returns the master procid for vertex lvid.
     */
    procid_t l_master(lvid_type lvid) const {
      ASSERT_LT(lvid, lvid2record.size());
      return lvid2record[lvid].owner;
    }


    /** \internal
     *  \brief Returns a reference to the internal graph representation
     */
    local_graph_type& get_local_graph() {
      return local_graph;
    }

    /** \internal
     *  \brief Returns a const reference to the internal graph representation
     */
    const local_graph_type& get_local_graph() const {
      return local_graph;
    }


    /** \internal
     * This function synchronizes the master vertex data with all the mirrors.
     * This function must be called simultaneously by all machines
     */
    void synchronize(const vertex_set& vset = complete_set()) {
      typedef std::pair<vertex_id_type, vertex_data_type> pair_type;

      procid_t sending_proc;
      // Loop over all the local vertex records


#ifdef _OPENMP
#pragma omp parallel for
#endif
      for(lvid_type lvid = 0; lvid < lvid2record.size(); ++lvid) {
        typename buffered_exchange<pair_type>::buffer_type recv_buffer;
        const vertex_record& record = lvid2record[lvid];
        // if this machine is the owner of a record then send the
        // vertex data to all mirrors
        if(record.owner == rpc.procid() && vset.l_contains(lvid)) {
          foreach(size_t proc, record.mirrors()) {
            const pair_type pair(record.gvid, local_graph.vertex_data(lvid));
#ifdef _OPENMP
            vertex_exchange.send(proc, pair, omp_get_thread_num());
#else
            vertex_exchange.send(proc, pair);
#endif
          }
        }
        // Receive any vertex data and update local mirrors
        while(vertex_exchange.recv(sending_proc, recv_buffer, true)) {
          foreach(const pair_type& pair, recv_buffer)  {
            vertex(pair.first).data() = pair.second;
          }
          recv_buffer.clear();
        }
      }


      typename buffered_exchange<pair_type>::buffer_type recv_buffer;
      vertex_exchange.flush();
      while(vertex_exchange.recv(sending_proc, recv_buffer)) {
        foreach(const pair_type& pair, recv_buffer) {
          vertex(pair.first).data() = pair.second;
        }
        recv_buffer.clear();
      }
      ASSERT_TRUE(vertex_exchange.empty());
    } // end of synchronize


    /** \internal
     *  vertex type while provides access to local graph vertices.
     */
    struct local_vertex_type {
      distributed_graph& graph_ref;
      lvid_type lvid;

      local_vertex_type(distributed_graph& graph_ref, lvid_type lvid):
            graph_ref(graph_ref), lvid(lvid) { }

      /// \brief Can be casted from local_vertex_type using an explicit cast
      explicit local_vertex_type(vertex_type v) :graph_ref(v.graph_ref),lvid(v.lvid) { }
      /// \brief Can be casted to vertex_type using an explicit cast
      operator vertex_type() const {
        return vertex_type(graph_ref, lvid);
      }

      bool operator==(local_vertex_type& v) const {
        return lvid == v.lvid;
      }

      /// \brief Returns a reference to the data on the local vertex
      const vertex_data_type& data() const {
        return graph_ref.get_local_graph().vertex_data(lvid);
      }

      /// \brief Returns a reference to the data on the local vertex
      vertex_data_type& data() {
        return graph_ref.get_local_graph().vertex_data(lvid);
      }

      /** \brief Returns the number of in edges on the
       *         local graph of this local vertex
       */
      size_t num_in_edges() const {
        return graph_ref.get_local_graph().num_in_edges(lvid);
      }

      /** \brief Returns the number of in edges on the
       *         local graph of this local vertex
       */
      size_t num_out_edges() const {
        return graph_ref.get_local_graph().num_out_edges(lvid);
      }

      /// \brief Returns the local ID of this local vertex
      lvid_type id() const {
        return lvid;
      }

      /// \brief Returns the global ID of this local vertex
      vertex_id_type global_id() const {
        return graph_ref.global_vid(lvid);
      }

      /** \brief Returns a list of all in edges on the
       *         local graph of this local vertex
       */
      local_edge_list_type in_edges() {
        return graph_ref.l_in_edges(lvid);
      }

      /** \brief Returns a list of all out edges on the
       *         local graph of this local vertex
       */
      local_edge_list_type out_edges() {
        return graph_ref.l_out_edges(lvid);
      }

      /** \brief Returns the owner of this local vertex
       */
      procid_t owner() const {
        return graph_ref.l_get_vertex_record(lvid).owner;
      }

      /** \brief Returns the owner of this local vertex
       */
      bool owned() const {
        return graph_ref.l_get_vertex_record(lvid).owner == graph_ref.procid();
      }

      /** \brief Returns the number of in_edges of this vertex
       *         on the global graph
       */
      size_t global_num_in_edges() const {
        return graph_ref.l_get_vertex_record(lvid).num_in_edges;
      }


      /** \brief Returns the number of out_edges of this vertex
       *         on the global graph
       */
      size_t global_num_out_edges() const {
        return graph_ref.l_get_vertex_record(lvid).num_out_edges;
      }


      /** \brief Returns the set of mirrors of this vertex
       */
      const mirror_type& mirrors() const {
        return graph_ref.l_get_vertex_record(lvid)._mirrors;
      }

      size_t num_mirrors() const {
        return graph_ref.l_get_vertex_record(lvid).num_mirrors();
      }

      /** \brief Returns the vertex record of this
       *         this local vertex
       */
      vertex_record& get_vertex_record() {
        return graph_ref.l_get_vertex_record(lvid);
      }
    };


    /** \internal
     *  edge type which provides access to local graph edges */
    class local_edge_type {
    private:
      distributed_graph& graph_ref;
      typename local_graph_type::edge_type e;
    public:
      local_edge_type(distributed_graph& graph_ref,
                      typename local_graph_type::edge_type e):
                                                graph_ref(graph_ref), e(e) { }

      /// \brief Can be converted from edge_type via an explicit cast
      explicit local_edge_type(edge_type ge) :graph_ref(ge.graph_ref),e(ge.e) { }

      /// \brief Can be casted to edge_type using an explicit cast
      operator edge_type() const {
        return edge_type(graph_ref, e);
      }

      /// \brief Returns the source local vertex of the edge
      local_vertex_type source() const { return local_vertex_type(graph_ref, e.source().id()); }

      /// \brief Returns the target local vertex of the edge
      local_vertex_type target() const { return local_vertex_type(graph_ref, e.target().id()); }


      /// \brief Returns a constant reference to the data on the vertex
      const edge_data_type& data() const { return e.data(); }

      /// \brief Returns a reference to the data on the vertex
      edge_data_type& data() { return e.data(); }

      /// \brief Returns the internal ID of this edge
      edge_id_type id() const { return e.id(); }
    };

    /** \internal
     * \brief A functor which converts local_graph_type::edge_type to
     *        local_edge_type
     */
    struct make_local_edge_type_functor {
      typedef typename local_graph_type::edge_type argument_type;
      typedef local_edge_type result_type;
      distributed_graph& graph_ref;
      make_local_edge_type_functor(distributed_graph& graph_ref):
                                                      graph_ref(graph_ref) { }
      result_type operator() (const argument_type et) const {
        return local_edge_type(graph_ref, et);
      }
    };


    /** \internal
     * \brief A list of edges. Used by l_in_edges() and l_out_edges()
     */
    struct local_edge_list_type {
      make_local_edge_type_functor me_functor;
      typename local_graph_type::edge_list_type elist;

      typedef boost::transform_iterator<make_local_edge_type_functor,
                                      typename local_graph_type::edge_list_type::iterator> iterator;
      typedef iterator const_iterator;

      local_edge_list_type(distributed_graph& graph_ref,
                           typename local_graph_type::edge_list_type elist) :
                          me_functor(graph_ref), elist(elist) { }
      /// \brief Returns the number of edges in the list
      size_t size() const { return elist.size(); }

      /// \brief Random access to the list elements
      local_edge_type operator[](size_t i) const { return me_functor(elist[i]); }

      /** \brief Returns an iterator to the beginning of the list.
       *
       * Returns an iterator to the beginning of the list. \see end()
       * The iterator_type is local_edge_list_type::iterator.
       *
       * \code
       * local_edge_list_type::iterator iter = elist.begin();
       * while(iter != elist.end()) {
       *   ... [do stuff] ...
       *   ++iter;
       * }
       * \endcode
       *
      */
      iterator begin() const { return
          boost::make_transform_iterator(elist.begin(), me_functor); }

      /** \brief Returns an iterator to the end of the list.
       *
       * Returns an iterator to the end of the list. \see begin()
       * The iterator_type is local_edge_list_type::iterator.
       *
       * \code
       * local_edge_list_type::iterator iter = elist.begin();
       * while(iter != elist.end()) {
       *   ... [do stuff] ...
       *   ++iter;
       * }
       * \endcode
       *
      */
      iterator end() const { return
          boost::make_transform_iterator(elist.end(), me_functor); }

      /// \brief Returns true if the list is empty
      bool empty() const { return elist.empty(); }
    };


  private:

    // PRIVATE DATA MEMBERS ===================================================>
    /** The rpc interface for this class */
    mutable dc_dist_object<distributed_graph> rpc;

  public:

    // For the warp engine to find the remote instances of this class
    size_t get_rpc_obj_id() {
      return rpc.get_obj_id();
    }

  private:
    bool finalized;

    /** The local graph data */
    local_graph_type local_graph;

    /** The map from global vertex ids to vertex records */
    std::vector<vertex_record>  lvid2record;

    // boost::unordered_map<vertex_id_type, lvid_type> vid2lvid;
    /** The map from global vertex ids back to local vertex ids */
    typedef hopscotch_map<vertex_id_type, lvid_type> hopscotch_map_type;
    typedef hopscotch_map_type vid2lvid_map_type;

    hopscotch_map_type vid2lvid;


    /** The global number of vertices and edges */
    size_t nverts, nedges;

    /** The number of vertices owned by this proc */
    size_t local_own_nverts;

    /** The global number of vertex replica */
    size_t nreplicas;

    /** pointer to the distributed ingress object*/
    distributed_ingress_base<VertexData, EdgeData>* ingress_ptr;

    /** Buffered Exchange used by synchronize() */
    buffered_exchange<std::pair<vertex_id_type, vertex_data_type> > vertex_exchange;

    /** Buffered Exchange used by vertex sets */
    buffered_exchange<vertex_id_type> vset_exchange;

    /** Command option to disable parallel ingress. Used for simulating single node ingress */
    bool parallel_ingress;


    lock_manager_type lock_manager;

    void set_ingress_method(const std::string& method,
        size_t bufsize = 50000, bool usehash = false, bool userecent = false) {
      if(ingress_ptr != NULL) { delete ingress_ptr; ingress_ptr = NULL; }
      if (method == "oblivious") {
        if (rpc.procid() == 0) logstream(LOG_EMPH) << "Use oblivious ingress, usehash: " << usehash
          << ", userecent: " << userecent << std::endl;
        ingress_ptr = new distributed_oblivious_ingress<VertexData, EdgeData>(rpc.dc(), *this, usehash, userecent);
      } else if (method == "hdrf") {
        if (rpc.procid() == 0) logstream(LOG_EMPH) << "Use hdrf oblivious ingress, usehash: " << usehash
          << ", userecent: " << userecent << std::endl;
        ingress_ptr = new distributed_hdrf_ingress<VertexData, EdgeData>(rpc.dc(), *this, usehash, userecent);
      } else if  (method == "random") {
        if (rpc.procid() == 0)logstream(LOG_EMPH) << "Use random ingress" << std::endl;
        ingress_ptr = new distributed_random_ingress<VertexData, EdgeData>(rpc.dc(), *this); 
      } else if (method == "grid") {
        if (rpc.procid() == 0)logstream(LOG_EMPH) << "Use grid ingress" << std::endl;
        ingress_ptr = new distributed_constrained_random_ingress<VertexData, EdgeData>(rpc.dc(), *this, "grid");
      } else if (method == "pds") {
        if (rpc.procid() == 0)logstream(LOG_EMPH) << "Use pds ingress" << std::endl;
        ingress_ptr = new distributed_constrained_random_ingress<VertexData, EdgeData>(rpc.dc(), *this, "pds");
      } else {
        // use default ingress method if none is specified
        std::string ingress_auto="";
        size_t num_shards = rpc.numprocs();
        int nrow, ncol, p;
        if (sharding_constraint::is_pds_compatible(num_shards, p)) {
          ingress_auto="pds";
          ingress_ptr = new distributed_constrained_random_ingress<VertexData, EdgeData>(rpc.dc(), *this, "pds");
        } else if (sharding_constraint::is_grid_compatible(num_shards, nrow, ncol)) {
          ingress_auto="grid";
          ingress_ptr = new distributed_constrained_random_ingress<VertexData, EdgeData>(rpc.dc(), *this, "grid");
        } else {
          ingress_auto="oblivious";
          ingress_ptr = new distributed_oblivious_ingress<VertexData, EdgeData>(rpc.dc(), *this, usehash, userecent);
        }
        if (rpc.procid() == 0)logstream(LOG_EMPH) << "Automatically determine ingress method: " << ingress_auto << std::endl;
      }
      // batch ingress is deprecated
      // if (method == "batch") {
      //   logstream(LOG_EMPH) << "Use batch ingress, bufsize: " << bufsize
      //     << ", usehash: " << usehash << ", userecent" << userecent << std::endl;
      //   ingress_ptr = new distributed_batch_ingress<VertexData, EdgeData>(rpc.dc(), *this,
      //                                                    bufsize, usehash, userecent);
      // } else 
    } // end of set ingress method


    /**
       \internal
       This internal function is used to load a single line from an input stream
     */
    template<typename Fstream>
    bool load_from_stream(std::string filename, Fstream& fin,
                          line_parser_type& line_parser) {
      size_t linecount = 0;
      timer ti; ti.start();
      while(fin.good() && !fin.eof()) {
        std::string line;
        std::getline(fin, line);
        if(line.empty()) continue;
        if(fin.fail()) break;
        const bool success = line_parser(*this, filename, line);
        if (!success) {
          logstream(LOG_WARNING)
            << "Error parsing line " << linecount << " in "
            << filename << ": " << std::endl
            << "\t\"" << line << "\"" << std::endl;
          return false;
        }
        ++linecount;
        if (ti.current_time() > 5.0) {
          logstream(LOG_INFO) << linecount << " Lines read" << std::endl;
          ti.start();
        }
      }
      return true;
    } // end of load from stream


    template<typename Fstream, typename Writer>
    void save_vertex_to_stream(vertex_type& vertex, Fstream& fout, Writer writer) {
      fout << writer.save_vertex(vertex);
    } // end of save_vertex_to_stream


    template<typename Fstream, typename Writer>
    void save_edge_to_stream(edge_type& edge, Fstream& fout, Writer writer) {
      std::string ret = writer.save_edge(edge);
      fout << ret;
    } // end of save_edge_to_stream


    void save_bintsv4_to_stream(std::ostream& out) {
      for (int i = 0; i < (int)local_graph.num_vertices(); ++i) {
        uint32_t src = l_vertex(i).global_id();
        foreach(local_edge_type e, l_vertex(i).out_edges()) {
          uint32_t dest = e.target().global_id();
          out.write(reinterpret_cast<char*>(&src), 4);
          out.write(reinterpret_cast<char*>(&dest), 4);
        }
        if (l_vertex(i).owner() == rpc.procid()) {
          vertex_type gv = vertex_type(l_vertex(i));
          // store disconnected vertices if I am the master of the vertex
          if (gv.num_in_edges() == 0 && gv.num_out_edges() == 0) {
            out.write(reinterpret_cast<char*>(&src), 4);
            uint32_t dest = (uint32_t)(-1);
            out.write(reinterpret_cast<char*>(&dest), 4);
          }
        }
      }
    }

    bool load_bintsv4_from_stream(std::istream& in) {
      while(in.good()) {
        uint32_t src, dest;
        in.read(reinterpret_cast<char*>(&src), 4);
        in.read(reinterpret_cast<char*>(&dest), 4);
        if (in.fail()) break;
        if (dest == (uint32_t)(-1)) {
          add_vertex(src);
        }
        else {
          add_edge(src, dest);
        }
      }
      return true;
    }


    /** \brief Saves a distributed graph using a direct ostream saving function
     *
     * This function saves a sequence of files numbered
     * \li [prefix]_0
     * \li [prefix]_1
     * \li [prefix]_2
     * \li etc.
     *
     * This files can be loaded with direct_stream_load().
     */
    void save_direct(const std::string& prefix, bool gzip,
                    boost::function<void (graph_type*, std::ostream&)> saver) {
      rpc.full_barrier();
      finalize();
      timer savetime;  savetime.start();
      std::string fname = prefix + "_" + tostr(rpc.procid() + 1) + "_of_" +
                          tostr(rpc.numprocs());
      if (gzip) fname = fname + ".gz";
      logstream(LOG_INFO) << "Save graph to " << fname << std::endl;
      if(boost::starts_with(fname, "hdfs://")) {
        graphlab::hdfs hdfs;
        graphlab::hdfs::fstream out_file(hdfs, fname, true);
        boost::iostreams::filtering_stream<boost::iostreams::output> fout;
        if (gzip) fout.push(boost::iostreams::gzip_compressor());
        fout.push(out_file);
        if (!fout.good()) {
          logstream(LOG_FATAL) << "\n\tError opening file: " << fname << std::endl;
          exit(-1);
        }
        saver(this, boost::ref(fout));
        fout.pop();
        if (gzip) fout.pop();
        out_file.close();
      } else {
        std::ofstream out_file(fname.c_str(),
                               std::ios_base::out | std::ios_base::binary);
        if (!out_file.good()) {
          logstream(LOG_FATAL) << "\n\tError opening file: " << fname << std::endl;
          exit(-1);
        }
        boost::iostreams::filtering_stream<boost::iostreams::output> fout;
        if (gzip) fout.push(boost::iostreams::gzip_compressor());
        fout.push(out_file);
        saver(this, boost::ref(fout));
        fout.pop();
        if (gzip) fout.pop();
        out_file.close();
      }
      logstream(LOG_INFO) << "Finish saving graph to " << fname << std::endl
                          << "Finished saving bintsv4 graph: "
                          << savetime.current_time() << std::endl;
      rpc.full_barrier();
    } // end of save


    /**
     *  \brief Load a graph from a collection of files in stored on
     *  the filesystem using the user defined line parser. Like
     *  \ref load(const std::string& path, line_parser_type line_parser)
     *  but only loads from the filesystem.
     */
    void load_direct_from_posixfs(std::string prefix,
                           boost::function<bool (graph_type*, std::istream&)> parser) {
      std::string directory_name; std::string original_path(prefix);
      boost::filesystem::path path(prefix);
      std::string search_prefix;
      if (boost::filesystem::is_directory(path)) {
        // if this is a directory
        // force a "/" at the end of the path
        // make sure to check that the path is non-empty. (you do not
        // want to make the empty path "" the root path "/" )
        directory_name = path.native();
      }
      else {
        directory_name = path.parent_path().native();
        search_prefix = path.filename().native();
        directory_name = (directory_name.empty() ? "." : directory_name);
      }
      std::vector<std::string> graph_files;
      fs_util::list_files_with_prefix(directory_name, search_prefix, graph_files);
      if (graph_files.size() == 0) {
        logstream(LOG_WARNING) << "No files found matching " << original_path << std::endl;
      }
      for(size_t i = 0; i < graph_files.size(); ++i) {
        if (i % rpc.numprocs() == rpc.procid()) {
          logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl;
          // is it a gzip file ?
          const bool gzip = boost::ends_with(graph_files[i], ".gz");
          // open the stream
          std::ifstream in_file(graph_files[i].c_str(),
                                std::ios_base::in | std::ios_base::binary);
          // attach gzip if the file is gzip
          boost::iostreams::filtering_stream<boost::iostreams::input> fin;
          // Using gzip filter
          if (gzip) fin.push(boost::iostreams::gzip_decompressor());
          fin.push(in_file);
          const bool success = parser(this, boost::ref(fin));
          if(!success) {
            logstream(LOG_FATAL)
              << "\n\tError parsing file: " << graph_files[i] << std::endl;
          }
          fin.pop();
          if (gzip) fin.pop();
        }
      }
      rpc.full_barrier();
    }

    /**
     *  \brief Load a graph from a collection of files in stored on
     *  the HDFS using the user defined line parser. Like
     *  \ref load(const std::string& path, line_parser_type line_parser)
     *  but only loads from HDFS.
     */
    void load_direct_from_hdfs(std::string prefix,
                         boost::function<bool (graph_type*, std::istream&)> parser) {
      // force a "/" at the end of the path
      // make sure to check that the path is non-empty. (you do not
      // want to make the empty path "" the root path "/" )
      std::string path = prefix;
      if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
      if(!hdfs::has_hadoop()) {
        logstream(LOG_FATAL)
          << "\n\tAttempting to load a graph from HDFS but GraphLab"
          << "\n\twas built without HDFS."
          << std::endl;
      }
      hdfs& hdfs = hdfs::get_hdfs();
      std::vector<std::string> graph_files;
      graph_files = hdfs.list_files(path);
      if (graph_files.size() == 0) {
        logstream(LOG_WARNING) << "No files found matching " << prefix << std::endl;
      }
      for(size_t i = 0; i < graph_files.size(); ++i) {
        if (i % rpc.numprocs() == rpc.procid()) {
          logstream(LOG_EMPH) << "Loading graph from file: " << graph_files[i] << std::endl;
          // is it a gzip file ?
          const bool gzip = boost::ends_with(graph_files[i], ".gz");
          // open the stream
          graphlab::hdfs::fstream in_file(hdfs, graph_files[i]);
          boost::iostreams::filtering_stream<boost::iostreams::input> fin;
          if(gzip) fin.push(boost::iostreams::gzip_decompressor());
          fin.push(in_file);
          const bool success = parser(this, boost::ref(fin));
          if(!success) {
            logstream(LOG_FATAL)
              << "\n\tError parsing file: " << graph_files[i] << std::endl;
          }
          fin.pop();
          if (gzip) fin.pop();
        }
      }
      rpc.full_barrier();
    }

    void load_direct(std::string prefix,
             boost::function<bool (graph_type*, std::istream&)> parser) {
      rpc.full_barrier();
      if(boost::starts_with(prefix, "hdfs://")) {
        load_direct_from_hdfs(prefix, parser);
      } else {
        load_direct_from_posixfs(prefix, parser);
      }
      rpc.full_barrier();
    } // end of load

    friend class tests::distributed_graph_test;
  }; // End of graph
} // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/graph/dynamic_local_graph.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DYNAMIC_LOCAL_GRAPH_HPP
#define GRAPHLAB_DYNAMIC_LOCAL_GRAPH_HPP


#include <cmath>

#include <string>
#include <list>
#include <vector>
#include <set>
#include <map>

#include <queue>
#include <algorithm>
#include <functional>
#include <fstream>

#include <boost/bind.hpp>
#include <boost/unordered_set.hpp>
#include <boost/type_traits.hpp>
#include <boost/typeof/typeof.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <boost/iterator/counting_iterator.hpp>
#include <boost/iterator/zip_iterator.hpp>
#include <boost/range/iterator_range.hpp>

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/local_edge_buffer.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/util/generics/shuffle.hpp>
#include <graphlab/util/generics/counting_sort.hpp>
#include <graphlab/util/generics/dynamic_csr_storage.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/macros_def.hpp>


namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class dynamic_local_graph {
  public:

    /** The type of the vertex data stored in the local_graph. */
    typedef VertexData vertex_data_type;

    /** The type of the edge data stored in the local_graph. */
    typedef EdgeData edge_data_type;

    typedef graphlab::vertex_id_type vertex_id_type;
    typedef graphlab::edge_id_type edge_id_type;

  private:
    class edge_iterator;

  public:
    typedef boost::iterator_range<edge_iterator> edge_list_type;

    /** Vertex object which provides access to the vertex data
     * and information about it.
     */
    class vertex_type;

      /** Edge object which provides access to the edge data
     * and information about it.
     */
    class edge_type;

  public:

    // CONSTRUCTORS ============================================================>
    /** Create an empty local_graph. */
    dynamic_local_graph()  { }

    /** Create a local_graph with nverts vertices. */
    dynamic_local_graph(size_t nverts) :
      vertices(nverts)  {}

    // METHODS =================================================================>

    static bool is_dynamic() {
      return true;
    }

    /**
     * \brief Resets the local_graph state.
     */
    void clear() {
      vertices.clear();
      edges.clear();
      _csc_storage.clear();
      _csr_storage.clear();
      std::vector<VertexData>().swap(vertices);
      std::vector<EdgeData>().swap(edges);
      edge_buffer.clear();
    }

    /** \brief Get the number of vertices */
    size_t num_vertices() const {
      return vertices.size();
    } // end of num vertices

    /** \brief Get the number of edges */
    size_t num_edges() const {
        return edges.size();
    } // end of num edges


    /**
     * \brief Creates a vertex containing the vertex data and returns the id
     * of the new vertex id. Vertex ids are assigned in increasing order with
     * the first vertex having id 0.
     */
    void add_vertex(lvid_type vid, const VertexData& vdata = VertexData() ) {
      if(vid >= vertices.size()) {
        // Enable capacity doubling if resizing beyond capacity
        if(vid >= vertices.capacity()) {
          const size_t new_size = std::max(2 * vertices.capacity(),
                                           size_t(vid));
          vertices.reserve(new_size);
        }
        vertices.resize(vid+1);
      }
      vertices[vid] = vdata;
    } // End of add vertex;

    void reserve(size_t num_vertices) {
      ASSERT_GE(num_vertices, vertices.size());
      vertices.reserve(num_vertices);
    }

    /**
     * \brief Add additional vertices up to provided num_vertices.  This will
     * fail if resizing down.
     */
    void resize(size_t num_vertices ) {
      ASSERT_GE(num_vertices, vertices.size());
      vertices.resize(num_vertices);
    } // End of resize

    void reserve_edge_space(size_t n) {
      edge_buffer.reserve_edge_space(n);
    }
    /**
     * \brief Creates an edge connecting vertex source to vertex target.  Any
     * existing data will be cleared. Should not be called after finalization.
     */
    edge_id_type add_edge(lvid_type source, lvid_type target,
                          const EdgeData& edata = EdgeData()) {
      if(source == target) {
        logstream(LOG_FATAL)
          << "Attempting to add self edge (" << source << " -> " << target <<  ").  "
          << "This operation is not permitted in GraphLab!" << std::endl;
        ASSERT_MSG(source != target, "Attempting to add self edge!");
      }

      if(source >= vertices.size() || target >= vertices.size())
        add_vertex(std::max(source, target));

      // Add the edge to the set of edge data (this copies the edata)
      edge_buffer.add_edge(source, target, edata);

      // This is not the final edge_id, so we always return 0.
      return 0;
    } // End of add edge

    /**
     * \brief Add edges in block.
     */
    void add_edges(const std::vector<lvid_type>& src_arr,
                   const std::vector<lvid_type>& dst_arr,
                   const std::vector<EdgeData>& edata_arr) {
      ASSERT_TRUE((src_arr.size() == dst_arr.size())
                  && (src_arr.size() == edata_arr.size()));

      for (size_t i = 0; i < src_arr.size(); ++i) {
        lvid_type source = src_arr[i];
        lvid_type target = dst_arr[i];
        if ( source >= vertices.size()
             || target >= vertices.size() ) {
          logstream(LOG_FATAL)
            << "Attempting add_edge (" << source
            << " -> " << target
            << ") when there are only " << vertices.size()
            << " vertices" << std::endl;
          ASSERT_MSG(source < vertices.size(), "Invalid source vertex!");
          ASSERT_MSG(target < vertices.size(), "Invalid target vertex!");
        }

        if(source == target) {
          logstream(LOG_FATAL)
            << "Attempting to add self edge (" << source << " -> " << target <<  ").  "
            << "This operation is not permitted in GraphLab!" << std::endl;
          ASSERT_MSG(source != target, "Attempting to add self edge!");
        }
      }
      edge_buffer.add_block_edges(src_arr, dst_arr, edata_arr);
    } // End of add block edges


    /** \brief Returns a vertex of given ID. */
    vertex_type vertex(lvid_type vid) {
      ASSERT_LT(vid, vertices.size());
      return vertex_type(*this, vid);
    }

    /** \brief Returns a vertex of given ID. */
    const vertex_type vertex(lvid_type vid) const {
      ASSERT_LT(vid, vertices.size());
      return vertex_type(*this, vid);
    }

    /** \brief Returns a reference to the data stored on the vertex v. */
    VertexData& vertex_data(lvid_type v) {
      ASSERT_LT(v, vertices.size());
      return vertices[v];
    } // end of data(v)

    /** \brief Returns a constant reference to the data stored on the vertex v. */
    const VertexData& vertex_data(lvid_type v) const {
      ASSERT_LT(v, vertices.size());
      return vertices[v];
    } // end of data(v)

    /**
     * \brief Finalize the local_graph data structure by
     * sorting edges to maximize the efficiency of graphlab.
     * This function takes O(|V|log(degree)) time and will
     * fail if there are any duplicate edges.
     * Detail implementation depends on the type of graph_storage.
     * This is also automatically invoked by the engine at start.
     */
    void finalize() {

      graphlab::timer mytimer; mytimer.start();
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize starts." << std::endl;
#endif
      std::vector<edge_id_type> src_permute;
      std::vector<edge_id_type> dest_permute;
      std::vector<edge_id_type> src_counting_prefix_sum;
      std::vector<edge_id_type> dest_counting_prefix_sum;

#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by source vertex" << std::endl;
#endif
      counting_sort(edge_buffer.source_arr, dest_permute, &src_counting_prefix_sum);
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by dest id" << std::endl;
#endif
      counting_sort(edge_buffer.target_arr, src_permute, &dest_counting_prefix_sum);

      std::vector< std::pair<lvid_type, edge_id_type> >  csr_values;
      std::vector< std::pair<lvid_type, edge_id_type> >  csc_values;

      csr_values.reserve(dest_permute.size());
      edge_id_type begineid = edges.size();
      for (size_t i = 0; i < dest_permute.size(); ++i) {
        csr_values.push_back(std::pair<lvid_type, edge_id_type> (edge_buffer.target_arr[dest_permute[i]],
                                                                 begineid + dest_permute[i]));
      }
      csc_values.reserve(src_permute.size());

      for (size_t i = 0; i < src_permute.size(); ++i) {
        csc_values.push_back(std::pair<lvid_type, edge_id_type> (edge_buffer.source_arr[src_permute[i]],
                                                                 begineid + src_permute[i]));
      }
      ASSERT_EQ(csc_values.size(), csr_values.size());

      // fast path with first time insertion.
      if (edges.size() == 0) {
        edges.swap(edge_buffer.data);
        edge_buffer.clear();
        // warp into csr csc storage.
        _csr_storage.wrap(src_counting_prefix_sum, csr_values);
        _csc_storage.wrap(dest_counting_prefix_sum, csc_values);
      } else {
        // insert edge data
        edges.reserve(edges.size() + edge_buffer.size());
        edges.insert(edges.end(), edge_buffer.data.begin(), edge_buffer.data.end());
        std::vector<EdgeData>().swap(edge_buffer.data);
        edge_buffer.clear();
        size_t begin, end;
        for (size_t i = 0; i < src_counting_prefix_sum.size(); ++i) {
          begin = src_counting_prefix_sum[i];
          end = (i==src_counting_prefix_sum.size()-1)
              ? csr_values.size()
              : src_counting_prefix_sum[i+1];
          if (end > begin) {
            _csr_storage.insert(i, csr_values.begin()+begin, csr_values.begin()+end);
          }
        }
        for (size_t i = 0; i < dest_counting_prefix_sum.size(); ++i) {
          begin = dest_counting_prefix_sum[i];
          end = (i==dest_counting_prefix_sum.size()-1)
              ? csc_values.size()
              : dest_counting_prefix_sum[i+1];
          if (end > begin) {
            _csc_storage.insert(i, csc_values.begin()+begin, csc_values.begin()+end);
          }
        }
        _csr_storage.repack();
        _csc_storage.repack();
      }
      ASSERT_EQ(_csr_storage.num_values(), _csc_storage.num_values());
      ASSERT_EQ(_csr_storage.num_values(), edges.size());

#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "End of finalize." << std::endl;
#endif
      logstream(LOG_INFO) << "Graph finalized in " << mytimer.current_time()
                          << " secs" << std::endl;

#ifdef DEBUG_GRAPH
      _csr_storage.meminfo(std::cerr);
      _csc_storage.meminfo(std::cerr);
#endif
    } // End of finalize


    /** \brief Load the local_graph from an archive */
    void load(iarchive& arc) {
      clear();
      // read the vertices
      arc >> vertices
          >> edges
          >> _csr_storage
          >> _csc_storage;
    } // end of load

    /** \brief Save the local_graph to an archive */
    void save(oarchive& arc) const {
      // Write the number of edges and vertices
      arc << vertices
          << edges
          << _csr_storage
          << _csc_storage;
    } // end of save

    /** swap two graphs */
    void swap(dynamic_local_graph& other) {
      std::swap(vertices, other.vertices);
      std::swap(edges, other.edges);
      std::swap(_csr_storage, other._csr_storage);
      std::swap(_csc_storage, other._csc_storage);
    } // end of swap


    /** \brief Load the local_graph from a file */
    void load(const std::string& filename) {
      std::ifstream fin(filename.c_str());
      iarchive iarc(fin);
      iarc >> *this;
      fin.close();
    } // end of load

    /**
     * \brief save the local_graph to the file given by the filename
     */
    void save(const std::string& filename) const {
      std::ofstream fout(filename.c_str());
      oarchive oarc(fout);
      oarc << *this;
      fout.close();
    } // end of save

    /**
     * \brief save the adjacency structure to a text file.
     *
     * Save the adjacency structure as a text file in:
     *    src_Id, dest_Id \n
     *    src_Id, dest_Id \n
     * format.
     */
    void save_adjacency(const std::string& filename) const {
      std::ofstream fout(filename.c_str());
      ASSERT_TRUE(fout.good());

      for (size_t i = 0; i < num_vertices(); ++i) {
        vertex_type v(i);
        edge_list_type ls = v.out_edges();
        foreach(edge_type e, ls) {
          fout << (lvid_type)i << ", " << e.target().id() << "\n";
          ASSERT_TRUE(fout.good());
        }
      }
      fout.close();
    }

 /****************************************************************************
 *                       Internal Functions                                 *
 *                     ----------------------                               *
 * These functions functions and types provide internal access to the       *
 * underlying local_graph representation. They should not be used unless you      *
 * *really* know what you are doing.                                        *
 ****************************************************************************/
    /**
     * \internal
     * \brief Returns the number of in edges of the vertex with the given id. */
    size_t num_in_edges(const lvid_type v) const {
      return _csc_storage.begin(v).pdistance_to(_csc_storage.end(v));
    }

    /**
     * \internal
     * \brief Returns the number of in edges of the vertex with the given id. */
    size_t num_out_edges(const lvid_type v) const {
      return _csr_storage.begin(v).pdistance_to(_csr_storage.end(v));
    }

    /**
     * \internal
     * \brief Returns a list of in edges of the vertex with the given id. */
    edge_list_type in_edges(lvid_type v) {
      edge_iterator begin = edge_iterator(*this, edge_iterator::CSC,
                                          _csc_storage.begin(v), v);
      edge_iterator end = edge_iterator(*this, edge_iterator::CSC,
                                        _csc_storage.end(v), v);
      return boost::make_iterator_range(begin, end);
    }

    /**
     * \internal
     * \brief Returns a list of out edges of the vertex with the given id. */
    edge_list_type out_edges(lvid_type v) {
      edge_iterator begin = edge_iterator(*this, edge_iterator::CSR,
                                          _csr_storage.begin(v), v);
      edge_iterator end = edge_iterator(*this, edge_iterator::CSR,
                                        _csr_storage.end(v), v);
      return boost::make_iterator_range(begin, end);
    }

    /**
     * \internal
     * \brief Returns edge data of edge_type e
     * */
    EdgeData& edge_data(edge_id_type eid) {
      ASSERT_LT(eid, num_edges());
      return edges[eid];
    }
    /**
     * \internal
     * \brief Returns const edge data of edge_type e
     * */
    const EdgeData& edge_data(edge_id_type eid) const {
      ASSERT_LT(eid, num_edges());
      return edges[eid];
    }

    /**
     * \internal
     * \brief Returns the estimated memory footprint of the local_graph. */
    size_t estimate_sizeof() const {
      const size_t vlist_size = sizeof(vertices) +
        sizeof(VertexData) * vertices.capacity();
      size_t elist_size = _csr_storage.estimate_sizeof()
          + _csc_storage.estimate_sizeof()
          + sizeof(edges) + sizeof(EdgeData)*edges.capacity();
      size_t ebuffer_size = edge_buffer.estimate_sizeof();
      return vlist_size + elist_size + ebuffer_size;
    }

    /** \internal
     * \brief For debug purpose, returns the largest vertex id in the edge_buffer
     */
    const lvid_type maxlvid() const {
      if (edge_buffer.size()) {
        lvid_type max(0);
        foreach(lvid_type i, edge_buffer.source_arr)
         max = std::max(max, i);
        foreach(lvid_type i, edge_buffer.target_arr)
         max = std::max(max, i);
        return max;
      } else {
        return lvid_type(-1);
      }
    }

  private:
    /**
     * \internal
     * CSR/CSC storage types
     */
    typedef dynamic_csr_storage<std::pair<lvid_type, edge_id_type>, edge_id_type> csr_type;

    typedef typename csr_type::iterator csr_edge_iterator;

    // PRIVATE DATA MEMBERS ===================================================>
    //
    /** The vertex data is simply a vector of vertex data */
    std::vector<VertexData> vertices;

    /** Stores the edge data and edge relationships. */
    csr_type _csr_storage;
    csr_type _csc_storage;
    std::vector<EdgeData> edges;

    /** The edge data is a vector of edges where each edge stores its
        source, destination, and data. Used for temporary storage. The
        data is transferred into CSR+CSC representation in
        Finalize. This will be cleared after finalized.*/
    local_edge_buffer<VertexData, EdgeData> edge_buffer;

    /**************************************************************************/
    /*                                                                        */
    /*                            declare friends                             */
    /*                                                                        */
    /**************************************************************************/
    friend class local_graph_test;
  }; // End of class dynamic_local_graph


  template<typename VertexData, typename EdgeData>
  std::ostream& operator<<(std::ostream& out,
                           const dynamic_local_graph<VertexData, EdgeData>& local_graph) {
    for(lvid_type vid = 0; vid < local_graph.num_vertices(); ++vid) {
      foreach(edge_id_type eid, local_graph.out_edge_ids(vid))
        out << vid << ", " << local_graph.target(eid) << '\n';
    }
    return out;
  }
} // end of namespace graphlab


/////////////////////// Implementation of Helper Class ////////////////////////////

namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class dynamic_local_graph<VertexData, EdgeData>::vertex_type {
     public:
       vertex_type(dynamic_local_graph& lgraph_ref, lvid_type vid):lgraph_ref(lgraph_ref),vid(vid) { }

       /// \brief Returns a constant reference to the data on the vertex.
       const vertex_data_type& data() const {
         return lgraph_ref.vertex_data(vid);
       }
       /// \brief Returns a reference to the data on the vertex.
       vertex_data_type& data() {
         return lgraph_ref.vertex_data(vid);
       }
       /// \brief Returns the number of in edges of the vertex.
       size_t num_in_edges() const {
         return lgraph_ref.num_in_edges(vid);
       }
       /// \brief Returns the number of out edges of the vertex.
       size_t num_out_edges() const {
         return lgraph_ref.num_out_edges(vid);
       }
       /// \brief Returns the ID of the vertex.
       lvid_type id() const {
         return vid;
       }
       /// \brief Returns a list of in edges.
       edge_list_type in_edges() {
         return lgraph_ref.in_edges(vid);
       }
       /// \brief Returns a list of out edges.
       edge_list_type out_edges() {
         return lgraph_ref.out_edges(vid);
       }
     private:
       dynamic_local_graph& lgraph_ref;
       lvid_type vid;
    };

    template<typename VertexData, typename EdgeData>
    class dynamic_local_graph<VertexData, EdgeData>::edge_type {
     public:
      edge_type(dynamic_local_graph& lgraph_ref, lvid_type _source, lvid_type _target, edge_id_type _eid) :
        lgraph_ref(lgraph_ref), _source(_source), _target(_target), _eid(_eid) { }

      /// \brief Returns a constant reference to the data on the edge.
      const edge_data_type& data() const {
        return lgraph_ref.edge_data(_eid);
      }
      /// \brief Returns a reference to the data on the edge.
      edge_data_type& data() {
        return lgraph_ref.edge_data(_eid);
      }
      /// \brief Returns the source vertex of the edge.
      vertex_type source() const {
        return vertex_type(lgraph_ref, _source);
      }
      /// \brief Returns the target vertex of the edge.
      vertex_type target() const {
        return vertex_type(lgraph_ref, _target);
      }
      /// \brief Returns the internal ID of this edge
      edge_id_type id() const { return _eid; }

     private:
      dynamic_local_graph& lgraph_ref;
      lvid_type _source;
      lvid_type _target;
      edge_id_type _eid;
    };

    template<typename VertexData, typename EdgeData>
    class dynamic_local_graph<VertexData, EdgeData>::edge_iterator :
        public boost::iterator_facade < edge_iterator,
                                        edge_type,
                                        boost::random_access_traversal_tag,
                                        edge_type> {
         public:
           enum list_type {CSR, CSC};

           edge_iterator(dynamic_local_graph& lgraph_ref, list_type _type,
                         csr_edge_iterator _iter, lvid_type _vid)
               : lgraph_ref(lgraph_ref), _type(_type), _iter(_iter), _vid(_vid) {}

         private:
           friend class boost::iterator_core_access;

           void increment() {
             ++_iter;
           }
           bool equal(const edge_iterator& other) const
           {
             ASSERT_EQ(_type, other._type);
             return _iter == other._iter;
           }
           edge_type dereference() const {
             return make_value();
           }
           void advance(int n) {
             _iter += n;
           }
           ptrdiff_t distance_to(const edge_iterator& other) const {
             return (other._iter - _iter);
           }
         private:
           edge_type make_value() const {
            typename csr_edge_iterator::reference ref = *_iter;
             switch (_type) {
              case CSC: {
                return edge_type(lgraph_ref, ref.first, _vid, ref.second);
              }
              case CSR: {
                return edge_type(lgraph_ref, _vid, ref.first, ref.second);
              }
              default: return edge_type(lgraph_ref, -1, -1, -1);
             }
           }
           dynamic_local_graph& lgraph_ref;
           const list_type _type;
           csr_edge_iterator _iter;
           const lvid_type _vid;
        }; // end of edge_iterator

} // end of namespace


namespace std {
  /**
   * Swap two graphs
   */
  template<typename VertexData, typename EdgeData>
  inline void swap(graphlab::dynamic_local_graph<VertexData,EdgeData>& a,
                   graphlab::dynamic_local_graph<VertexData,EdgeData>& b) {
    a.swap(b);
  } // end of swap
}; // end of namespace std


#include <graphlab/macros_undef.hpp>
#endif
      // Insert edges into finalized graph
//       if (finalized) {
//         graphlab::timer mytimer; mytimer.start();
// #ifdef DEBUG_GRAPH
//         logstream(LOG_INFO) << "Insert edges into finalized graph..." << std::endl;
// #endif
//         // insert adjacency into csr/csc
//         for (size_t i = 0; i < edge_buffer.size(); ++i) {
//           edge_id_type eid =  edges.size() + i;
//           _csr_storage.insert(edge_buffer.source_arr[i],
//                               std::pair<lvid_type, edge_id_type>(
//                                   edge_buffer.target_arr[i], eid));
//           _csc_storage.insert(edge_buffer.target_arr[i],
//                               std::pair<lvid_type,edge_id_type>(
//                                   edge_buffer.source_arr[i], eid));
//         }
//
//         // insert edge data
//         edges.reserve(edges.size() + edge_buffer.size());
//         edges.insert(edges.end(), edge_buffer.data.begin(), edge_buffer.data.end());
//         std::vector<EdgeData>().swap(edge_buffer.data);
//
//         edge_buffer.clear();
//
// #ifdef DEBGU_GRAPH
//       logstream(LOG_DEBUG) << "Finish finalization." << std::endl;
// #endif
//       logstream(LOG_INFO) << "Graph finalized in " << mytimer.current_time()
//                           << " secs" << std::endl;
//       _csr_storage.meminfo(std::cerr);
//       _csc_storage.meminfo(std::cerr);
//         return;
//       }


================================================
FILE: src/graphlab/graph/graph_basic_types.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_GRAPH_BASIC_TYPES
#define GRAPHLAB_GRAPH_BASIC_TYPES

#include <stdint.h>

namespace graphlab {


#ifdef USE_VID32
  /// Identifier type of a vertex which is globally consistent. Guaranteed to be integral
  typedef uint32_t vertex_id_type;
#else
  typedef uint64_t vertex_id_type;
#endif

  /// Identifier type of a vertex which is only locally consistent. Guaranteed to be integral
  typedef vertex_id_type lvid_type;

  /**
   * Identifier type of an edge which is only locally
   * consistent. Guaranteed to be integral and consecutive.
   */
  typedef lvid_type edge_id_type;

  /**
   * \brief The set of edges that are traversed during gather and scatter
   * operations.
   */
  enum edge_dir_type {
    /**
     * \brief No edges implies that no edges are processed during the
     * corresponding gather or scatter phase, essentially skipping
     * that phase.
     */
    NO_EDGES = 0,
    /**
     * \brief In edges implies that only whose target is the center
     * vertex are processed during gather or scatter.
     */
    IN_EDGES = 1,
    /**
     * \brief Out edges implies that only whose source is the center
     * vertex are processed during gather or scatter.
     */
    OUT_EDGES = 2 ,
    /**
     * \brief All edges implies that all adges adjacent to a the
     * center vertex are processed on gather or scatter.  Note that
     * some neighbors may be encountered twice if there is both an in
     * and out edge to that neighbor.
     */
    ALL_EDGES = 3};
} // end of namespace graphlab

#endif


================================================
FILE: src/graphlab/graph/graph_formats.dox
================================================
/**

\page graph_formats Graph File Formats

We build in support for 3 common portable graph file formats (tsv, snap, adj),
one GraphLab specific portable format (bintsv4) as well 2 GraphLab specific
non-portable formats (graphjrl, bin).

\section graph_portable_formats Portable Formats
All portable graph file formats supported are unable to store graph data,
but can only store graph structure. The formats currently with built-in support
are "tsv", "snap", "adj" and "bintsv4", described below. Graphs of this format
can be saved / loaded using graphlab::distributed_graph::save_format()
and graphlab::distributed_graph::load_format() functions. 

"tsv", "snap" and "adj" are text formats and are human readable.

"bintsv4" is a binary format.


\subsection graph_tsv_format tsv (edge list)
The TSV format is a simple edge-list between vertices where each line in 
the file is a [src ID] [target ID] pair separated by whitespace.

For instance, the following graph:
\image html graph_format_example.gif

can be stored as:
\verbatim
1 2
1 5
7 5
5 7
7 1
\endverbatim

Note that vertex IDs do not need to be consecutive, and edges may appear
in any arbitrary order. Furthermore, the graph specification requires that
vertex IDs are 32-bit integers ranging from 0 to (2^32 - 2).  The ID (2^32 - 1)
is reserved.

Empty lines in the file are permissible, but no other symbols are permitted.

Observe that the TSV format cannot store vertices with no edges.
 
\subsection graph_snap_format snap (edge list)
The SNAP file format is supported to simplify the use of datasets from 
the <a href=http://snap.stanford.edu/data/>Stanford Large Network Dataset Collection</a>

The format is identical to \ref TSV TSV with one minor difference: lines
beginning with "#" are treated as comments and ignored.

For instance, the following graph:
\image html graph_format_example.gif

can be stored as:
\verbatim
# example graph
# vertices: 4 edges: 5
1 2
1 5
7 5
5 7
7 1
\endverbatim

Note that while the file includes a vertex and an edge count in this example,
they are treated as comments and ignored.

Observe that the SNAP format cannot store vertices with no edges.

\subsection graph_adj_format adj (adjacency list) 
The Adjacency list file format stores on each line, a source vertex, followed
by a list of all target vertices: each line has the following format:

\verbatim
[vertex ID]  [number of target vertices] [target ID 1] [target ID 2] [target ID 3] ...
\endverbatim

This format is more compact to store, and in practice will partition better
in the distributed setting (since edges have a natural grouping).
Furthermore, this format is capable of storing disconnected vertices.

For instance, the following graph:
\image html graph_format_example.gif

can be stored as:
\verbatim
1 2 2 5
7 2 7 1
5 1 7
\endverbatim

We may include the line
\verbatim
2 0
\endverbatim
to identify that vertex 2 has no out-edges.
However, this is optional since vertex 2 will be created when the edge
<tt>1->2</tt> is created. such lines are only necessary for truly disconnected
vertices.


\subsection graph_bintsv4_format bintsv4 (binary edge list)
The bintsv4 format is a binary storage format. The graph is represented
as a sequence of 8 byte blocks: 

\verbatim
---------------------------------
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
---------------------------------
|   src VID     |   dest VID    |
---------------------------------
\endverbatim

Where each block stores a pair of 32 bit unsigned integer values in x86 
little endian format. 
Each block represents an edge src -> dest. Vertex IDs cannot take on the value
2^32-1

Disconnected vertices are stored as:

\verbatim
---------------------------------
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
---------------------------------
|      VID      |   2^32 - 1    |
---------------------------------
\endverbatim


\section graph_nonportable_formats Non-Portable Formats
The non-portable formats store all information in the graph including the
graph data. These formats are convenient and in the case of the "bin" format
significantly faster to load. However, they are also brittle in that 
changes to your vertex/edge data serialization will render any saved files
unreadable. Also, we may make changes to the serialization binary format
at any point. These formats should be treated as fast, temporary storage
methods and must not be used for long-term archival. 

\subsection graph_format_graphjrl graphjrl (Graph Journal) 

The GraphJRL format serializes each vertex and edge onto a <tt>\n</tt> 
terminated line, ensuring to escape any <tt>\n</tt> characters that occur
within the serialized data.

This format is not human readable and should be treated as temporary storage.
Unlike the "bin" format below, graphs saved in this format do not require
the same number of machines to load the graph. i.e. graphs saved using 8 
machines can be loaded using any arbitrary number of machines.


\subsection graph_format_bin bin (Distributed Graph Binary)
This format is simply a direct serialization of all Distributed Graph
datastructures. The graph is finalized before saving, and thus do not need
to be finalized after loading. This is the most efficient storage
of the distributed graph, requiring the least loading time.

However, the disadvantage of the "bin" format is that it requires exactly the 
same number of machines to load the graph as there was when saving the graph.
In other words, if 8 machines were used to save the graph, it must be loaded
using exactly 8 machines. 
*/


================================================
FILE: src/graphlab/graph/graph_gather_apply.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_GRAPH_GATHER_APPLY_HPP
#define GRAPHLAB_GRAPH_GATHER_APPLY_HPP

#include <deque>
#include <boost/bind.hpp>
#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/graph/vertex_set.hpp>
#include <graphlab/util/memory_info.hpp>
#include <graphlab/parallel/thread_pool.hpp>

#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>

#include <graphlab/macros_def.hpp>

namespace graphlab {
  template<typename Graph, typename GatherType>
  class graph_gather_apply {
  public:
    /**
     * \brief The type of graph supported by this vertex program
     *
     * See graphlab::distributed_graph
     */
    typedef Graph graph_type;

    /**
     * \brief Graph related types
     */
    typedef typename graph_type::vertex_type          vertex_type;
    typedef typename graph_type::vertex_data_type     vertex_data_type;
    typedef typename graph_type::mirror_type          mirror_type;
    typedef typename graph_type::local_vertex_type    local_vertex_type;
    typedef typename graph_type::local_edge_type      local_edge_type;
    typedef typename graph_type::lvid_type            lvid_type;


    /**
     * \brief The result type of the gather operation.
     */
    typedef GatherType         gather_type; 

    /**
     * \brief The type of the gather function. 
     */
    typedef typename boost::function<gather_type (lvid_type, graph_type&)> gather_fun_type;

    /**
     * \brief The gather operation which will be called on each vertex (master and mirrors) and send to the master vertex.
     */
    gather_fun_type gather_fun;

    /**
     * \brief The type of the apply function. 
     */
    typedef typename boost::function<void (lvid_type, const gather_type& accum, graph_type&)> apply_fun_type;

    /**
     * \brief The apply operation which will be called on each vertex (master and mirrors) with the result of gather.
     */
    apply_fun_type apply_fun;

  private:

    /**
     * \brief The object used to communicate with remote copies of the
     * synchronous engine.
     */
    dc_dist_object< graph_gather_apply<Graph,GatherType> > rmi;

    /**
     * \brief A reference to the distributed graph on which this
     * synchronous engine is running.
     */
    graph_type& graph;

    /**
     * \brief The local worker threads used by this engine
     */
    thread_pool threads;

    /**
     * \brief A thread barrier that is used to control the threads in the
     * thread pool.
     */
    graphlab::barrier thread_barrier;

    /**
     * \brief The shared counter used coordinate operations between
     * threads.
     */
    atomic<size_t> shared_lvid_counter;

    /**
     * \brief  The vertex locks protect access to vertex specific data-structrues including
     * \ref graphlab::graph_gather_apply::gather_accum.
     */
    std::vector<simple_spinlock> vlocks;

    /**
     * \brief Bit indicating if the gather has accumulator contains any values.
     *
     * While dense bitsets are thread safe the value of this bit must change concurrently with 
     * the \ref graphlab::graph_gather_apply and therefore is set while holding the lock in 
     * \ref graphlab::graph_gather_apply::vlocks
     */
    dense_bitset has_gather_accum;
    

    /**
     * \brief Gather accumulator used for each master vertex to merge the result of all the machine
     * specific accumulators.
     *
     * The gather accumulator can be accessed by multiple threads at once and therefore must be guarded 
     * by a vertex locks in \ref graphlab::graph_gather_apply::vlocks
     */
    std::vector<gather_type> gather_accum;


    /**
     * \brief The pair type used to synchronize the results of the gather phase
     */
    typedef std::pair<vertex_id_type, gather_type> vid_gather_pair_type;

    /**
     * \brief The type of the exchange used to synchronize gather
     * accumulators
     */
    typedef buffered_exchange<vid_gather_pair_type> gather_exchange_type;

    /**
     * \brief The distributed exchange used to synchronize gather
     * accumulators.
     */
    gather_exchange_type gather_exchange;


  public:

    /**
     * \brief Construct a graph gather_apply operation with a given graph and 
     * gather apply functions.
     *
     *
     * In the distributed engine the synchronous engine must be called
     * on all machines at the same time (in the same order) passing
     * the \ref graphlab::distributed_control object.  Upon
     * construction the synchronous engine allocates several
     * data-structures to store messages, gather accumulants, and
     * vertex programs and therefore may require considerable memory.
     *
     * The number of threads to create are read from
     * \ref graphlab_options::get_ncpus "opts.get_ncpus()".
     *
     * See the <a href="#engineopts">main class documentation</a>
     * for details on the available options.
     *
     * @param [in] dc Distributed controller to associate with
     * @param [in,out] graph A reference to the graph object that this
     * engine will modify. The graph must be fully constructed and
     * finalized.
     * @param [in] opts A graphlab::graphlab_options object specifying engine
     *                  parameters.  This is typically constructed using
     *                  \ref graphlab::command_line_options.
     */
    graph_gather_apply(graph_type& graph,
                       gather_fun_type gather_fun,
                       apply_fun_type apply_fun,
                       const graphlab_options& opts = graphlab_options());


    /**
     * \brief Start execution of the synchronous engine.
     *
     * The start function begins computation and does not return until
     * there are no remaining messages or until max_iterations has
     * been reached.
     *
     * The start() function modifies the data graph through the vertex
     * programs and so upon return the data graph should contain the
     * result of the computation.
     *
     * @return The reason for termination
     */
    void exec(const vertex_set& vset = vertex_set(true));

  private:
    // Program Steps ==========================================================
    /**
     * \brief Executes ncpus copies of a member function each with a
     * unique consecutive id (thread id).
     *
     * This function is used by the main loop to execute each of the
     * stages in parallel.
     *
     * The member function must have the type:
     *
     * \code
     * void graph_gather_apply::member_fun(size_t threadid, const vertex_set& vset);
     * \endcode
     *
     * This function runs an rmi barrier after termination
     *
     * @tparam the type of the member function.
     * @param [in] member_fun the function to call.
     */
    template<typename MemberFunction>
    void run_synchronous(MemberFunction member_fun, const vertex_set& vset) {
      shared_lvid_counter = 0;
      if (threads.size() <= 1) {
        (this->*(member_fun))(0, vset);
      }
      else {
        // launch the initialization threads
        for(size_t i = 0; i < threads.size(); ++i) {
          boost::function<void(void)> invoke = boost::bind(member_fun, this, i, vset);
          threads.launch(invoke, i);
        }
      }
      // Wait for all threads to finish
      threads.join();
      rmi.barrier();
    } // end of run_synchronous

    /**
     * \brief Execute the \ref graphlab::graph_gather_apply::gather_fun function on all
     * vertices in the vset. The result of the gather will be send to the master proc of the vertex. 
     *
     * The accumulators are stored in \ref graphlab::graph_gather_apply::gather_accum if the 
     * the vertex is pre-allocated in the local graph of the proc. Otherwise, it wil be stored in
     * \ref graphlab::graph_gather_apply::temporary_gather_map and be merged in the graph.
     * 
     * @param thread_id the thread to run this as which determines
     * @param vset the vertex set specifying the set of vertex to run the gather operation. 
     */
    void execute_gathers(const size_t thread_id, const vertex_set& vset);


    /**
     * \brief Scatter the gather accumulator from master to the mirrors.
     *
     * @param thread_id the thread to run this as which determines
     * @param vset the vertex set specifying the set of vertex to run the gather operation. 
     */
    void execute_scatters(const size_t thread_id, const vertex_set& vset);

    /**
     * \brief Execute the \ref graphlab::graph_gahter_apply::apply_fun function on all
     * all vertices (masters and mirrors) with the synrhonized gather accumulators.
     *
     * @param thread_id the thread to run this as which determines
     * @param vset the vertex set specifying the set of vertex to run the gather operation. 
     */
    void execute_applys(const size_t thread_id, const vertex_set& vset);


    // Data Synchronization ===================================================
    /**
     * \brief Send the gather value for the vertex id to its master.
     *
     * @param [in] lvid the vertex to send the gather value to
     * @param [in] accum the locally computed gather value.
     */
    void sync_gather(lvid_type lvid, const gather_type& accum, size_t thread_id);

    /**
     * \brief Receive the gather values from the buffered exchange.
     *
     * This function returns when there is nothing left in the
     * buffered exchange and should be called after the buffered
     * exchange has been flushed
     */
    void recv_gathers(const bool try_to_recv = false);


    /**
     * \brief Send the gather values from master to mirrors.
     */
    void scatter_gather(lvid_type lvid, const gather_type& accum, size_t thread_id);
  }; // end of class synchronous engine


  /**
   * Constructs an synchronous distributed engine.
   * The number of threads to create are read from
   * opts::get_ncpus().
   *
   * Valid engine options (graphlab_options::get_engine_args()):
   * \arg \c max_iterations Sets the maximum number of iterations the
   * engine will run for.
   * \arg \c use_cache If set to true, partial gathers are cached.
   * See \ref gather_caching to understand the behavior of the
   * gather caching model and how it may be used to accelerate program
   * performance.
   *
   * \param dc Distributed controller to associate with
   * \param graph The graph to schedule over. The graph must be fully
   *              constructed and finalized.
   * \param opts A graphlab_options object containing options and parameters
   *             for the engine.
   */
  template<typename Graph, typename GatherType>
  graph_gather_apply<Graph,GatherType>::graph_gather_apply(
      graph_type& graph,
      gather_fun_type gather_fun,
      apply_fun_type apply_fun,
      const graphlab_options& opts) :
    gather_fun(gather_fun), apply_fun(apply_fun), rmi(graph.dc(), this), graph(graph),
    threads(opts.get_ncpus()),
    thread_barrier(opts.get_ncpus()),
    gather_exchange(graph.dc(), opts.get_ncpus()) { } 


  template<typename Graph, typename GatherType> 
      void graph_gather_apply<Graph,GatherType>::exec(const vertex_set& vset) {
        if (vset.lazy && !vset.is_complete_set)
          return;

        gather_accum.clear();
        // Allocate vertex locks and vertex programs
        vlocks.resize(graph.num_local_vertices());
        // Allocate gather accumulators and accumulator bitset
        gather_accum.resize(graph.num_local_vertices(), gather_type());
        has_gather_accum.resize(graph.num_local_vertices());
        has_gather_accum.clear();
        rmi.barrier();

        // Execute gather operations-------------------------------------------
        // Execute the gather operation for all vertices that are active
        // in this minor-step (active-minorstep bit set).
        // if (rmi.procid() == 0) std::cout << "Gathering..." << std::endl;
        run_synchronous(&graph_gather_apply::execute_gathers, vset);

        // Execute the gather operation for all vertices that are active
        // in this minor-step (active-minorstep bit set).
        // if (rmi.procid() == 0) std::cout << "Gathering..." << std::endl;
        run_synchronous(&graph_gather_apply::execute_scatters, vset);


        // Execute Apply Operations -------------------------------------------
        // Run the apply function on all active vertices
        // if (rmi.procid() == 0) std::cout << "Applying..." << std::endl;
        run_synchronous(&graph_gather_apply::execute_applys, vset);

        /**
         * Post conditions:
         *   1) any changes to the vertex data have been synchronized
         *      with all mirrors.
         *   2) all gather accumulators have been cleared
         */

        // Final barrier to ensure that all engines terminate at the same time
        rmi.full_barrier();
  } // end of start


  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph, GatherType>::
  execute_gathers(const size_t thread_id, const vertex_set& vset) {
    const bool TRY_TO_RECV = true;
    const size_t TRY_RECV_MOD = 1000;
    size_t vcount = 0;
    timer ti;

    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset;
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;

      local_bitset.clear();

      if (vset.lazy)  {
        ASSERT_TRUE(vset.is_complete_set);
        local_bitset.fill();
      } else {
        // get the bit field from has_message
        size_t lvid_bit_block = vset.localvset.containing_word(lvid_block_start);
        if (lvid_bit_block == 0) continue;
        // initialize a word sized bitfield
        local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      }

      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        // std::cout << "proc " << rmi.procid() << " gather on lvid " << lvid << std::endl;
        gather_type accum = gather_fun(lvid, graph);

        // If the accum contains a value for the local gather we put
        // that estimate in the gather exchange.
        sync_gather(lvid, accum, thread_id);

        // try to recv gathers if there are any in the buffer
        if(++vcount % TRY_RECV_MOD == 0) recv_gathers(TRY_TO_RECV);
      }
    } // end of loop over vertices to compute gather accumulators
    gather_exchange.partial_flush(thread_id);
      // Finish sending and receiving all gather operations
    thread_barrier.wait();
    if(thread_id == 0) gather_exchange.flush();
    thread_barrier.wait();
    recv_gathers();
  } // end of execute_gathers
  
  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph, GatherType>::
  execute_scatters(const size_t thread_id, const vertex_set& vset) {
    const bool TRY_TO_RECV = true;
    const size_t TRY_RECV_MOD = 1000;
    size_t vcount = 0;
    timer ti;

    fixed_dense_bitset<8 * sizeof(size_t)> local_bitset;
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start =
                  shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;

      local_bitset.clear();

      if (vset.lazy)  {
        ASSERT_TRUE(vset.is_complete_set);
        local_bitset.fill();
      } else {
        // get the bit field from has_message
        size_t lvid_bit_block = vset.localvset.containing_word(lvid_block_start);
        if (lvid_bit_block == 0) continue;
        // initialize a word sized bitfield
        local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      }

      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        if (graph.l_is_master(lvid)) {
          const gather_type& accum = gather_accum[lvid];
          apply_fun(lvid, accum, graph);
          scatter_gather(lvid, accum, thread_id);
          // try to recv gathers if there are any in the buffer
          if(++vcount % TRY_RECV_MOD == 0) recv_gathers(TRY_TO_RECV);
        }
      }
    } // end of loop over vertices to compute gather accumulators
    gather_exchange.partial_flush(thread_id);
      // Finish sending and receiving all gather operations
    thread_barrier.wait();
    if(thread_id == 0) gather_exchange.flush();
    thread_barrier.wait();
    recv_gathers();
  } // end of execute_gathers


  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph,GatherType>::
  execute_applys(const size_t thread_id, const vertex_set& vset) {
    fixed_dense_bitset<8  * sizeof(size_t)> local_bitset;
    while (1) {
      // increment by a word at a time
      lvid_type lvid_block_start = shared_lvid_counter.inc_ret_last(8 * sizeof(size_t));
      if (lvid_block_start >= graph.num_local_vertices()) break;

      if (vset.lazy)  {
        ASSERT_TRUE(vset.is_complete_set);
        local_bitset.fill();
      } else {
        // get the bit field from has_message
        size_t lvid_bit_block = vset.localvset.containing_word(lvid_block_start);
        if (lvid_bit_block == 0) continue;
        // initialize a word sized bitfield
        local_bitset.initialize_from_mem(&lvid_bit_block, sizeof(size_t));
      }

      foreach(size_t lvid_block_offset, local_bitset) {
        lvid_type lvid = lvid_block_start + lvid_block_offset;
        if (lvid >= graph.num_local_vertices()) break;

        if (graph.l_is_master(lvid))
          continue;

        // Master and mirror vertices both should perform the apply step. 
        // vertex_type vertex(graph.l_vertex(lvid));

        // Get the local accumulator.  Note that it is possible that
        // the gather_accum was not set during the gather.
        const gather_type& accum = gather_accum[lvid];

        apply_fun(lvid, accum, graph);
     }
    } // end of loop over vertices to run apply
  } // end of execute_applys


  // Data Synchronization ===================================================
  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph,GatherType>::
  sync_gather(lvid_type lvid, const gather_type& accum, const size_t thread_id) {
    if(graph.l_is_master(lvid)) {
      vlocks[lvid].lock();
      if(has_gather_accum.get(lvid)) {
        gather_accum[lvid] += accum;
      } else {
        gather_accum[lvid] = accum;
        has_gather_accum.set_bit(lvid);
      }
      vlocks[lvid].unlock();
    } else {
      const procid_t master = graph.l_master(lvid);
      const vertex_id_type vid = graph.global_vid(lvid);
      gather_exchange.send(master, std::make_pair(vid, accum), thread_id);
    }
  } // end of sync_gather

  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph, GatherType>::
  scatter_gather(lvid_type lvid, const gather_type& accum, const size_t thread_id) {
    ASSERT_TRUE(graph.l_is_master(lvid));
    const vertex_id_type vid = graph.global_vid(lvid);
    local_vertex_type vertex = graph.l_vertex(lvid);
    foreach(const procid_t& mirror, vertex.mirrors()) {
      gather_exchange.send(mirror, std::make_pair(vid, accum), thread_id);
    }
  } // end of sync_gather

  template<typename Graph, typename GatherType>
  void graph_gather_apply<Graph,GatherType>::
  recv_gathers(const bool try_to_recv) {
    procid_t procid(-1);
    typename gather_exchange_type::buffer_type buffer;
    while(gather_exchange.recv(procid, buffer, try_to_recv)) {
      foreach(const vid_gather_pair_type& pair, buffer) {
        ASSERT_TRUE(graph.vid2lvid.find(pair.first) != graph.vid2lvid.end());
        const lvid_type lvid = graph.local_vid(pair.first);
        const gather_type& accum = pair.second;
        vlocks[lvid].lock();
        if( has_gather_accum.get(lvid) ) {
          gather_accum[lvid] += accum;
        } else {
          gather_accum[lvid] = accum;
          has_gather_accum.set_bit(lvid);
        }
        vlocks[lvid].unlock();
      }
    }
  } // end of recv_gather
}; // namespace
#include <graphlab/macros_undef.hpp>
#endif


// Remove the capability of merging flying gather accumulators.


    /**
     * \brief Temporary map storing the gather accumulators that is not preallocated in the 
     * \ref graphlab::graph_gather_apply::gather_accum. The key of the map is the global vertex id.
     *
     *  This map can be accessed by multiple threads at once and therefore must be guarded by a 
     *  lock \ref graphlab::graph_gather_apply::tmp_gather_map_lock.
     */
    // boost::unordered_map<vertex_id_type, gather_type> tmp_gather_map;


    /**
     * \brief Lock that protects access to the temporaroy gather accumulator map \ref graphlab::graph_gather_apply::tmp_gather_map. 
     */
    // mutex tmp_gather_map_lock;

    // /**
    //  * \brief Merge the gather accumulators in \ref graphlab::graph_gather_apply::tmp_gather_accum 
    //  * in to \ref graphlab::graph_gather_apply::gahter_accum. Resize the local graph and the associated 
    //  * data structures (\ref graphlab::distributed_graph::vid2lvid, 
    //  * \ref graphlab::distributed_graph::lvid2record)with the new vertex.
    //  */
    // void merge_temporary_gather_map();

  // template<typename Graph, typename GatherType>
  // void graph_gather_apply<Graph, GatherType>::
  // merge_temporary_gather_map() {
  //       // merge in the tmp_gather_map
  //       typename boost::unordered_map<vertex_id_type, gather_type>::const_iterator 
  //           it = tmp_gather_map.begin();

  //       // resize the graph
  //       size_t new_size = graph.num_local_vertices() + tmp_gather_map.size();
  //       gather_accum.resize(new_size);
  //       graph.lvid2record.resize(new_size);
  //       graph.local_graph.resize(new_size);
  //       for (; it != tmp_gather_map.end(); ++it) {
  //         lvid_type lvid = graph.vid2lvid.size();

  //         // update graph
  //         graph.vid2lvid[it->first] = lvid;
  //         graph.lvid2record[lvid].gvid = it->first;
  //         graph.lvid2record[lvid].owner = rmi.procid();

  //         // update gather accum vector
  //         gather_accum[lvid] = it->second;
  //       }
  //       tmp_gather_map.clear();
  // }


  // // Merge in the temporaroy gather map-------------------------------------------
  // // with the side effect of resizing the graph and associate vertex datastructures.
  // merge_temporary_gather_map();


  // if the vid does not exist on the proc, put in a temporary map and merge in later
  // if (graph.vid2lvid.find(pair.first) == graph.vid2lvid.end()) {
  //   tmp_gather_map_lock.lock();
  //   if (tmp_gather_map.find(pair.first) == tmp_gather_map.end()) {
  //     tmp_gather_map[pair.first] = pair.second;
  //   } else {
  //     tmp_gather_map[pair.first] += pair.second;
  //   }
  //   tmp_gather_map_lock.unlock();
  // } else {


================================================
FILE: src/graphlab/graph/graph_hash.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_GRAPH_HASH_UTIL
#define GRAPHLAB_GRAPH_HASH_UTIL

#include <boost/function.hpp>
#include <boost/functional/hash.hpp>
#include <graphlab/util/integer_mix.hpp>

namespace graphlab {
  namespace graph_hash {
    /** \brief Returns the hashed value of a vertex. */
    inline static size_t hash_vertex (const vertex_id_type vid) { 
      return integer_mix(vid);
    }

    /** \brief Returns the hashed value of an edge. */
    inline static size_t hash_edge (const std::pair<vertex_id_type, vertex_id_type>& e, const uint32_t seed = 5) {
      // a bunch of random numbers
#if (__SIZEOF_PTRDIFF_T__ == 8)
      static const size_t a[8] = {0x6306AA9DFC13C8E7,
        0xA8CD7FBCA2A9FFD4,
        0x40D341EB597ECDDC,
        0x99CFA1168AF8DA7E,
        0x7C55BCC3AF531D42,
        0x1BC49DB0842A21DD,
        0x2181F03B1DEE299F,
        0xD524D92CBFEC63E9};
#else
      static const size_t a[8] = {0xFC13C8E7,
        0xA2A9FFD4,
        0x597ECDDC,
        0x8AF8DA7E,
        0xAF531D42,
        0x842A21DD,
        0x1DEE299F,
        0xBFEC63E9};
#endif
      vertex_id_type src = e.first;
      vertex_id_type dst = e.second;
      return (integer_mix(src^a[seed%8]))^(integer_mix(dst^a[(seed+1)%8]));
    }
  } // end of graph_hash namespace
} // end of graphlab namespace
#endif


================================================
FILE: src/graphlab/graph/graph_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_GRAPH_INCLUDES
#define GRAPHLAB_GRAPH_INCLUDES

#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/graph/vertex_set.hpp>
#endif


================================================
FILE: src/graphlab/graph/graph_ops.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * \file graph_ops.hpp
 *
 * This file supports basic graph io operations to simplify reading
 * and writing adjacency structures from files.
 *
 */

#ifndef GRAPHLAB_GRAPH_OPS_HPP
#define GRAPHLAB_GRAPH_OPS_HPP


#include <iostream>
#include <fstream>
#include <string>

#include <boost/algorithm/string/predicate.hpp>
#include <graphlab/graph/distributed_graph.hpp>

#include <graphlab/macros_def.hpp>
namespace graphlab {
  

  namespace graph_ops {
    
    
    /**
     * builds a topological_sort of the graph returning it in topsort. 
     * 
     * \param[out] topsort Resultant topological sort of the graph vertices.
     *
     * function will return false if graph is not acyclic.
     */
    template <typename VertexType, typename EdgeType>
    bool topological_sort(const distributed_graph<VertexType, EdgeType>& graph, 
                          std::vector<vertex_id_type>& topsort) {
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      topsort.clear();
      topsort.reserve(graph.num_vertices());
      std::vector<size_t> indeg;
      indeg.resize(graph.num_vertices());
      std::queue<vertex_id_type> q;
      for (size_t i = 0;i < graph.num_vertices(); ++i) {
        indeg[i] = graph.get_in_edges(i).size();
        if (indeg[i] == 0) {
          q.push(i);
        }
      }
    
      while (!q.empty()) {
        vertex_id_type v = q.front();
        q.pop();
        topsort.push_back(v);
        foreach(typename graph_type::edge_type edge, graph.get_out_edges(v)) {
          vertex_id_type destv = edge.target();
          --indeg[destv];
          if (indeg[destv] == 0) {
            q.push(destv);
          }
        }
      }
      if (q.empty() && topsort.size() != graph.num_vertices()) {
        return false;
      }
      return true;
    } // end of topological sort


    template <typename VertexType, typename EdgeType>
    size_t num_neighbors(const distributed_graph<VertexType, EdgeType>& graph, 
                         vertex_id_type& vid) {
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      typename graph_type::edge_list_type in_edges =  graph.in_edges(vid); 
      typename graph_type::edge_list_type out_edges = graph.out_edges(vid);
      typename graph_type::edge_list_type::const_iterator i = in_edges.begin();
      typename graph_type::edge_list_type::const_iterator j = out_edges.begin();
      size_t count = 0;      
      for( ; i != in_edges.end() && j != out_edges.end(); ++count) {
        if(i->source() == j->target()) { 
          ++i; ++j; 
        } else if(i->source() < j->target()) { 
          ++i; 
        } else { 
          ++j; 
        }
      }
      for( ; i != in_edges.end(); ++i, ++count);
      for( ; j != out_edges.end(); ++j, ++count);
      return count;
    } // end of num_neighbors


    template <typename VertexType, typename EdgeType>
    void neighbors(const distributed_graph<VertexType, EdgeType>& graph, 
                   const vertex_id_type vid,   
                   std::vector<vertex_id_type>& neighbors ) {
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      typename graph_type::edge_list_type in_edges =  graph.in_edges(vid); 
      typename graph_type::edge_list_type out_edges = graph.out_edges(vid);
      typename graph_type::edge_list_type::const_iterator i = in_edges.begin();
      typename graph_type::edge_list_type::const_iterator j = out_edges.begin();
      while(i != in_edges.end() && j != out_edges.end()) {
        if(i->source() == j->target()) { 
          neighbors.push_back(i->source()); 
          ++i; ++j; 
        } else if(i->source() < j->target()) {
          neighbors.push_back(i->source()); 
          ++i; 
        } else { 
          neighbors.push_back(j->target()); 
          ++j; 
        } 
      }
      for( ; i != in_edges.end(); ++i) neighbors.push_back(i->source());
      for( ; j != out_edges.end(); ++j) neighbors.push_back(j->target());
    } // end of neighbors


    template <typename VertexType, typename EdgeType>
    bool save_metis_structure(const std::string& filename,
                              const distributed_graph<VertexType, EdgeType>& graph) { 
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;
    
      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;
      // Count the number of actual edges
      size_t nedges = 0;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i)
        nedges += num_neighbors(graph, i);
      fout << graph.num_vertices() << ' ' << (nedges/2) << '\n';
      // Save the adjacency structure
      std::vector<vertex_id_type> neighbor_set;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        neighbors(graph, i, neighbor_set);
        for(size_t j = 0; j < neighbor_set.size(); ++j) {
          fout << (neighbor_set[j] + 1);
          if(j + 1 < neighbor_set.size()) fout << ' ';
        }
        fout << '\n';
      }
      fout.close();
      return true;
    } // end of save metis


    template <typename VertexType, typename EdgeType>
    bool save_edge_list_structure(const std::string& filename,
                                  const distributed_graph<VertexType, EdgeType>& graph) { 
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;

      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) 
        foreach(edge_type edge, graph.out_edges(i)) 
          fout << edge.source() << '\t' << edge.target() << '\n';      
      fout.close();
      return true;
    } // end of save metis


    template <typename VertexType, typename EdgeType>
    bool save_zoltan_hypergraph_structure(const std::string& filename,
                                          const distributed_graph<VertexType, EdgeType>& graph) { 
      typedef distributed_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;

      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;

      // ok. I need to uniquely number each edge.
      // how?
      boost::unordered_map<std::pair<vertex_id_type, 
        vertex_id_type>, size_t> edgetoid;
      size_t curid = 0;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        foreach(const typename graph_type::edge_type& edge, graph.in_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          if (edgetoid.find(e) == edgetoid.end()) {
            edgetoid[e] = curid;
            ++curid;
          }
        }
        foreach(const typename graph_type::edge_type& edge, graph.out_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          if (edgetoid.find(e) == edgetoid.end()) {
            edgetoid[e] = curid;
            ++curid;
          }
        }
      }

      size_t numedges = curid;
      // each edge is a vertex, each vertex is an edge
      // a pin is total adjacency of a hyper edge
      fout << numedges << "\n\n";
      for (size_t i = 0;i < numedges; ++i) {
        fout << i+1 << "\n";
      }
      fout << "\n";
      fout << graph.num_vertices() << "\n\n";
      
      fout << numedges * 2 << "\n\n";
      // loop over the "hyperedge" and write out the edges it is adjacent to
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        boost::unordered_set<size_t> adjedges;
        foreach(const typename graph_type::edge_type& edge, graph.in_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          adjedges.insert(edgetoid[e]);
        }
        foreach(const typename graph_type::edge_type& edge, graph.out_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          adjedges.insert(edgetoid[e]);
        }
        // write
        std::vector<size_t> adjedgesvec;
        std::copy(adjedges.begin(), adjedges.end(), 
                  std::inserter(adjedgesvec, adjedgesvec.end()));
        fout << i+1 << " " << adjedgesvec.size() << "\t";        
        for (size_t j = 0;j < adjedgesvec.size(); ++j) {
          fout << adjedgesvec[j] + 1;
          if (j < adjedgesvec.size() - 1) fout << "\t";
        }
        fout << "\n";
      }
      fout.close();
      return true;
    }  // end of save_zoltan_hypergraph_structure


  }; // end of graph ops
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/graph/graph_storage_deprecated.hpp
================================================
/**  
 * Copyright (c) 2011 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/* *
 * Author: Haijie Gu (haijieg@cs.cmu.edu)
 * Date: 04/08/2013
 *
 * CSR+CSC implementation of a graph storage.
 * */

#ifndef GRAPHLAB_GRAPH_STORAGE_HPP
#define GRAPHLAB_GRAPH_STORAGE_HPP
#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <cmath>
#include <string>
#include <list>
#include <vector>
#include <set>
#include <map>

#include <queue>
#include <algorithm>
#include <functional>

#include <boost/version.hpp>
#include <boost/bind.hpp>
#include <boost/unordered_set.hpp>
#include <boost/iterator/zip_iterator.hpp>
#include <boost/iterator/counting_iterator.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <boost/iterator/iterator_facade.hpp>

#include <graphlab/graph/local_edge_buffer.hpp>
#include <graphlab/graph/graph_basic_types.hpp>

#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/util/generics/shuffle.hpp>
#include <graphlab/util/generics/counting_sort.hpp>
#include <graphlab/util/generics/vector_zip.hpp>
#include <graphlab/util/generics/csr_storage.hpp>

#include <graphlab/parallel/atomic.hpp>

#include <graphlab/macros_def.hpp>

namespace graphlab {
 
  template<typename VertexData, typename EdgeData>
  class graph_storage {
  public:
    typedef graphlab::lvid_type lvid_type;
    typedef graphlab::edge_id_type edge_id_type;

    /** The type of the edge data stored in the graph. */
    typedef EdgeData edge_data_type;

    /** The type of the vertex data stored in the graph. */
    typedef VertexData vertex_data_type;

    /** 
     * \internal
     * CSR/CSC storage types
     */
    typedef csr_storage<lvid_type, edge_id_type> csr_type;

    typedef csr_storage<std::pair<lvid_type, edge_id_type>, edge_id_type> csc_type; 

    /* ----------------------------------------------------------------------------- */
    /* helper data field and structures: edge_data_list, class edge, class edge_list */
    /* ----------------------------------------------------------------------------- */
  class edge_type {
   public:
     edge_type () : _source(-1), _target(-1), _eid(-1) {}
     edge_type(lvid_type _source, lvid_type _target, edge_id_type _eid)
         : _source(_source), _target(_target), _eid(_eid) { }

     inline bool is_empty() { return _eid == -1; } 
     lvid_type source() const { return _source; }
     lvid_type target() const { return _target; }
     edge_id_type id() const { return _eid; }

   private:
     lvid_type _source;
     lvid_type _target; 
     edge_id_type _eid;
  };

  typedef boost::tuple<typename csr_type::iterator,
                       boost::counting_iterator<edge_id_type> > csr_iterator_tuple;

  typedef boost::zip_iterator<csr_iterator_tuple> csr_zip_iterator;

  csr_zip_iterator make_csr_zip_iterator(typename csr_type::iterator iter,
                                           edge_id_type beginid) {
    return csr_zip_iterator(
             csr_iterator_tuple(iter, boost::counting_iterator<edge_id_type>(beginid)));
  }
  
  struct make_edge_type_csr_functor {
    typedef typename csr_zip_iterator::value_type const& argument_type;
    typedef edge_type result_type;

    make_edge_type_csr_functor() : sourceid(-1) { } 

    make_edge_type_csr_functor(lvid_type sourceid) : sourceid(sourceid) { }

    result_type operator() (argument_type arg) const {
      lvid_type destid = arg.template get<0>();
      edge_id_type eid = arg.template get<1>();
      return edge_type(sourceid, destid, eid);
    }
    lvid_type sourceid;
  };

  struct make_edge_type_csc_functor {
    typedef typename csc_type::value_type& argument_type;
    typedef edge_type result_type;
    make_edge_type_csc_functor() : destid(-1) {}

    make_edge_type_csc_functor(lvid_type destid) : destid(destid) { }

    result_type operator() (argument_type arg) const {
      return edge_type(arg.first, destid, arg.second);
    }
    lvid_type destid;
  };

  typedef boost::transform_iterator<make_edge_type_csr_functor,
          csr_zip_iterator> csr_edge_iterator;
  typedef boost::transform_iterator<make_edge_type_csc_functor,
          typename csc_type::iterator> csc_edge_iterator;

  class edge_iterator : 
    public boost::iterator_facade <
        edge_iterator,
        edge_type,
        boost::random_access_traversal_tag,
        edge_type> {
    public:
     edge_iterator() : _type(UNDEFINED) {}
     edge_iterator(csc_edge_iterator iter) : _type(CSC), csc_iter(iter) {}
     edge_iterator(csr_edge_iterator iter) : _type(CSR), csr_iter(iter) {}

    private:
     friend class boost::iterator_core_access;

     void increment() {
       switch (_type) {
        case CSC: ++csc_iter; break;
        case CSR: ++csr_iter; break;
        default: return;
       }
     }
     bool equal(const edge_iterator& other) const
     {
       ASSERT_EQ(_type, other._type);
       switch (_type) {
        case CSC: return csc_iter == other.csc_iter;
        case CSR: return csr_iter == other.csr_iter;
        default: return true;
       }
     }
     edge_type dereference() const { 
       switch (_type) {
        case CSC: return *csc_iter;
        case CSR: return *csr_iter;
        default: return edge_type();
       }
     }
     void decrement() {
       switch (_type) {
        case CSC: --csc_iter; break;
        case CSR: --csr_iter; break;
        default: return;
       }
     }
     void advance(int n) {
       switch (_type) {
        case CSC: csc_iter+=n; break;
        case CSR: csr_iter+=n; break;
        default: return;
       }
     } 
     int distance_to(const edge_iterator& other) const {
       switch (_type) {
        case CSC: return other.csc_iter - csc_iter;
        case CSR: return other.csr_iter - csr_iter;
        default: return 0;
       }
     }
    private:
     enum list_type {CSR, CSC, UNDEFINED}; 
     list_type _type;
     csc_edge_iterator csc_iter;
     csr_edge_iterator csr_iter;
  }; // end of edge_iterator

  class edge_list {
   public:
     edge_list(edge_iterator _begin, edge_iterator _end) :
         _begin(_begin), _end(_end) {}

     typedef edge_iterator iterator;
     typedef iterator const_iterator;

     inline size_t size() const { 
       return _end - _begin;
     }

     inline edge_type operator[](size_t i) const {
       return *(_begin+i);
     }

     bool is_empty() const { return size() == 0; }

     iterator begin() const { return _begin; }

     iterator end() const { return _end; }

   private:
     edge_iterator _begin;
     edge_iterator _end;
  };

  public:
    // CONSTRUCTORS ============================================================>
    graph_storage() {  }

    // METHODS =================================================================>
    /** \brief Returns the number of edges in the graph. */
    size_t num_edges() const { return _edata_storage.size(); }

    /** \brief Returns the number of vertices in the graph. */
    size_t num_vertices() const { return _csr_storage.num_keys(); }

    /** \brief Returns the number of in edges of the vertex. */
    size_t num_in_edges (const lvid_type v) const {
        return (_csc_storage.end(v) - _csc_storage.begin(v));
    }

    /** \brief Returns the number of out edges of the vertex. */
    size_t num_out_edges (const lvid_type v) const {
        return (_csr_storage.end(v) - _csr_storage.begin(v));
    }

    /** \brief Returns a list of in edges of a vertex. */
    edge_list in_edges(const lvid_type v) {
      make_edge_type_csc_functor functor(v);
      // make_edge_type_csc_functor functor;
      csc_edge_iterator begin =
          boost::make_transform_iterator (_csc_storage.begin(v), functor);
      csc_edge_iterator end =
          boost::make_transform_iterator (_csc_storage.end(v), functor);
      return edge_list ( edge_iterator(begin), edge_iterator(end));
    }

    /** \brief Returns a list of out edges of a vertex. */
    edge_list out_edges(const lvid_type v) {
      make_edge_type_csr_functor functor(v);
      csr_zip_iterator beginiter = 
          make_csr_zip_iterator(_csr_storage.begin(v), 
                                _csr_storage.begin(v)-_csr_storage.begin(0));
      csr_zip_iterator enditer = 
          make_csr_zip_iterator(_csr_storage.end(v), 
                                _csr_storage.end(v)-_csr_storage.begin(0));

      csr_edge_iterator begin = boost::make_transform_iterator (
          beginiter, functor);
      csr_edge_iterator end = boost::make_transform_iterator (
          enditer, functor);
      return edge_list ( edge_iterator(begin), edge_iterator(end));
    }

    /** \brief Returns edge data of edge_type e*/
    EdgeData& edge_data(edge_id_type eid) {
      ASSERT_LT(eid, num_edges());
      return _edata_storage[eid]; 
    }

    const EdgeData& edge_data(edge_id_type eid) const {
      ASSERT_LT(eid, num_edges());
      return _edata_storage[eid]; 
    }

     /** \brief Finalize the graph storage. 
      * Construct the CSC, CSR, by sorting edges to maximize the
      * efficiency of graphlab.  
      * This function takes O(|V|log(degree)) time and will 
      * fail if there are any duplicate edges.
      */
    void finalize(local_edge_buffer<VertexData, EdgeData> &edges) {
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize starts." << std::endl;
#endif
      std::vector<edge_id_type> permute;
      std::vector<edge_id_type> src_counting_prefix_sum;
      std::vector<edge_id_type> dest_counting_prefix_sum;
           
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by source vertex" << std::endl;
#endif
      // Sort edges by source;
      // Begin of counting sort.
      counting_sort(edges.source_arr, permute, &src_counting_prefix_sum);

      // Inplace permute of edge_data, edge_src, edge_target array.
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Inplace permute by source id" << std::endl;
#endif
      lvid_type swap_src; lvid_type swap_target; EdgeData  swap_data;
      for (size_t i = 0; i < permute.size(); ++i) {
        if (i != permute[i]) {
          // Reserve the ith entry;
          size_t j = i;
          swap_data = edges.data[i];
          swap_src = edges.source_arr[i];
          swap_target = edges.target_arr[i];
          // Begin swap cycle:
          while (j != permute[j]) {
            size_t next = permute[j];
            if (next != i) {
              edges.data[j] = edges.data[next];
              edges.source_arr[j] = edges.source_arr[next];
              edges.target_arr[j] = edges.target_arr[next];
              permute[j] = j;
              j = next;
            } else {
              // end of cycle
              edges.data[j] = swap_data;
              edges.source_arr[j] = swap_src;
              edges.target_arr[j] = swap_target;
              permute[j] = j;
              break;
            }
          }
        }
      }
      // Unfortunately the zip iterator does not work as expected. Need more debugging...
      // typedef 
      //     boost::tuple< std::vector<lvid_type>::iterator,
      //                   std::vector<lvid_type>::iterator,
      //                   typename std::vector<EdgeData>::iterator> iterator_tuple;

      // typedef boost::zip_iterator<iterator_tuple> zip_iterator;

      // inplace_shuffle( zip_iterator(iterator_tuple(
      //                                 edges.source_arr.begin(),
      //                                 edges.target_arr.begin(),
      //                                 edges.data.begin())),
      //                  zip_iterator(iterator_tuple(
      //                                 edges.source_arr.end(),
      //                                 edges.target_arr.end(),
      //                                 edges.data.end())),
      //                  permute);
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by dest id" << std::endl;
#endif
      counting_sort(edges.target_arr, permute, &dest_counting_prefix_sum); 
      // Shuffle source array
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Outofplace permute by dest id" << std::endl;
#endif

      outofplace_shuffle(edges.source_arr, permute);
      // Use inplace shuffle to reduce peak memory footprint:
      // inplace_shuffle(edges.source_arr, permute);
      // counting_sort(edges.target_arr, permute);

      // warp into csr csc storage.
      _csr_storage.wrap(src_counting_prefix_sum, edges.target_arr);
      std::vector<std::pair<lvid_type, edge_id_type> > csc_value = vector_zip(edges.source_arr, permute);
      _csc_storage.wrap(dest_counting_prefix_sum, csc_value); 
      _edata_storage.swap(edges.data);
      ASSERT_EQ(_csr_storage.num_values(), _csc_storage.num_values());
      ASSERT_EQ(_csr_storage.num_values(), _edata_storage.size());
#ifdef DEBGU_GRAPH
      logstream(LOG_DEBUG) << "End of finalize." << std::endl;
#endif
    } // end of finalize.


    /** \brief Reset the storage. */
    void clear() {
      _csr_storage.clear();
      _csc_storage.clear();
    }

    size_t estimate_sizeof() const {
      return _csr_storage.estimate_sizeof() + _csc_storage.estimate_sizeof();
    } // end of estimate_sizeof


    // ------------- Private data storage ----------------
  private:
    csr_type _csr_storage;
    csc_type _csc_storage;
    std::vector<EdgeData> _edata_storage;

  public:
    /** \brief Load the graph from an archive */
    void load(iarchive& arc) {
      clear();
      arc >> _csr_storage
          >> _csc_storage
          >> _edata_storage;
    }

    /** \brief Save the graph to an archive */
    void save(oarchive& arc) const {
      arc << _csr_storage
          << _csc_storage
          << _edata_storage;
    }

    /** swap two graph storage*/
    void swap(graph_storage& other) {
      _csr_storage.swap(other._csr_storage);
      _csc_storage.swap(other._csc_storage);
      _edata_storage.swap(other._edata_storage);
    }
  };// End of graph store;
}// End of namespace;

namespace std {
  template<typename VertexData, typename EdgeData>
  inline void swap(graphlab::graph_storage<VertexData,EdgeData>& a, 
                   graphlab::graph_storage<VertexData,EdgeData>& b) {
    a.swap(b);
  } // end of swap
}; // end of std namespace
#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/graph/graph_vertex_join.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_GRAPH_JOIN_HPP
#define GRAPHLAB_GRAPH_JOIN_HPP
#include <utility>
#include <boost/unordered_map.hpp>
#include <graphlab/util/hopscotch_map.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
namespace graphlab {


/**
 * \brief Provides the ability to pass information between vertices of two 
 * different graphs
 *
 * \tparam LeftGraph Type of the left graph 
 * \tparam RightGraph Type of the right graph 

 * The graph_vertex_join class allows information to be passed between
 * vertices of two different graphs. 
 * 
 * Given two graphs <code>g1</code> and <code>g2</code>, possibly of different 
 * types:
 * 
 * \code
 * typedef distributed_graph<VData1, EData1> graph_1_type;
 * typedef distributed_graph<VData2, EData2> graph_2_type;
 * graph_1_type g1;
 * graph_2_type g2;
 * \endcode
 *
 * A graph_vertex_join object can be created:
 * \code
 * graph_vertex_type<graph_1_type, graph_2_type> vjoin(dc, g1, g2);
 * \endcode
 *
 * The first argument is the distributed control object. The second argument
 * shall be referred to as the graph on the "left" side of the join, and the 
 * third argument shall be referred to as the graph on the "right" side of the
 * join.
 *
 * The join operates by having each vertex in both graph emit an integer key.
 * Vertices with the same key are then combined into the same group. The 
 * semantics of the key depends on the join operation to be performed.
 * Right now, the only join operation supported is the Left Injective Join
 * and the Right Injective Join (see below).
 *
*
 *
 * ## Left Injective Join
 * For the left injective join, vertices in the same graph must emit distinct
 * unique keys. i.e. Each vertex in <code>g1</code> must emit a key which is 
 * different from all other vertices in <code>g1</code>. Vertices on the right
 * graph are then matched with vertices on the left graph with the same key.
 * The join operation is then allowed to modify vertices on the left graph 
 * given the data on the vertices of the right graph.
 * 
 * To emit the keys:
 * \code
 * vjoin.prepare_injective_join(left_emit_key, right_emit_key);
 * \endcode
 * left_emit_key and right_emit_key are functions (or lambda) with the following 
 * prototype:
 * \code
 * size_t left_emit_key(const graph_1_type::vertex_type& vertex);
 * size_t right_emit_key(const graph_2_type::vertex_type& vertex);
 * \endcode
 * They essentially take as a constant argument, the vertex of their respective
 * graphs, and return an integer key. If the key has value (-1) it does not 
 * participate in the join.
  * After keys are emitted and prepared with prepare_join, to perform a left
 * injective join:
 * \code
 * vjoin.left_injective_join(join_op);
 * \endcode
 * Where join_op is a function with the following prototype:
 * \code
 * void join_op(graph_1_type::vertex_type& left_vertex, 
 *              const graph_2_type::vertex_data_type right_vertex_data);
 * \endcode
 * Note the asymmetry in the arguments: the left vertex is passed as a 
 * vertex_type, while for the right vertex, only the vertex data is accessible.
 * The function may make modifications on the left vertex.
 * 
 * The left_injective_join() function must be called by all machines.
 * As a result, it may be used from within an engine's 
 * \ref graphlab::iengine::add_vertex_aggregator aggregator's finalize
 * function.
 *
 * ### Left Injective Join Example
 * I have two graphs with identical structure. The left graph has data
 * \code
 * struct left_vertex_data {
 *   size_t user_id;
 *   std::string user_name;
 *   std::string email_address;
 *   // ... serializers omitted ...
 * }
 * typedef distributed_graph<left_vertex_data, some_edge_data> left_graph_type;
 * \endcode
 * However, when the left graph was constructed, there was no email address
 * information conveniently available, and thus was left blank.
 *
 * And the right graph has vertex data: 
 * \code
 * struct right_vertex_data {
 *   size_t user_id;
 *   std::string email_address;
 *   // ... serializers omitted ...
 * }
 * typedef distributed_graph<right_vertex_data, some_edge_data> right_graph_type;
 * \endcode
 * which was loaded from some other source, and contains all the email address
 * information.
 *
 * I create emit functions for both graphs :
 * \code
 * size_t emit_user_id_field_left(const left_graph_type::vertex_type& vtype) {
 *   return vtype.data().user_id;
 * }
 * size_t emit_user_id_field_left(const right_graph_type::vertex_type& vtype) {
 *   return vtype.data().user_id;
 * }
 * \endcode
 *
 * Create a join object and prepare the join:
 * \code
 * graph_vertex_join<left_graph_type, right_graph_type> vjoin(dc, 
 *                                                            left_graph, 
 *                                                            right_graph);
 * vjoin.prepare_injective_join(emit_user_id_field_left, 
 *                              emit_user_id_field_right);
 * \endcode
 *
 * To copy the email address field from the right graph to the left graph:
 * \code
 * void join_email_address(left_graph_type::vertex_type& left_vertex,
 *                         const right_vertex_data& rvtx) {
 *   left_vertex.data().email_address = rvtx.email_address; 
 * }
 * 
 * vjoin.left_injective_join(join_email_address);
 * \endcode
 * 
 * ## Right Injective Join
 * The right injective join is similar to the left injective join, but
 * with types reversed.
 */
template <typename LeftGraph, typename RightGraph> 
class graph_vertex_join {
  public:
    /// Type of the left graph
    typedef LeftGraph left_graph_type;
    /// Type of the right graph
    typedef RightGraph right_graph_type;
    /// Vertex Type of the left graph
    typedef typename right_graph_type::vertex_type left_vertex_type;
    /// Vertex Type of the right graph
    typedef typename left_graph_type::vertex_type right_vertex_type;
    /// Local Vertex Type of the left graph
    typedef typename right_graph_type::local_vertex_type left_local_vertex_type;
    /// Local Vertex Type of the right graph
    typedef typename left_graph_type::local_vertex_type right_local_vertex_type;
     /// Vertex Data Type of the left graph
    typedef typename right_graph_type::vertex_data_type left_data_type;
    /// Vertex Data Type of the right graph
    typedef typename left_graph_type::vertex_data_type right_data_type;

    dc_dist_object<graph_vertex_join<LeftGraph, RightGraph> > rmi;

  private:
    /// Reference to the left graph
    left_graph_type& left_graph;
    /// Reference to the right graph
    right_graph_type& right_graph;
    
    struct injective_join_index {
      std::vector<size_t> vtx_to_key;
      hopscotch_map<size_t, vertex_id_type> key_to_vtx;
      // we use -1 here to indicate that the vertex is not participating
      std::vector<procid_t> opposing_join_proc;
    };

    injective_join_index left_inj_index, right_inj_index;

  public:
    graph_vertex_join(distributed_control& dc,
                      left_graph_type& left,
                      right_graph_type& right): 
        rmi(dc, this), left_graph(left), right_graph(right) { }


    /**
      * \brief Initializes the join by associating each vertex with a key
      *
      * \tparam LeftEmitKey Type of the left_emit_key parameter. It should
      *   not be necessary to specify this. C++ type inference should be able
      *   to infer this automatically.
      * \tparam RightEmitKey Type of the right_emit_key parameter. It should
      *   not be necessary to specify this. C++ type inference should be able
      *   to infer this automatically.
      *
      * \param left_emit_key A function which takes a vertex_type from the
      *  left graph and emits an integral key value. 
      *  Can be a lambda, of the prototype:
      *   size_t left_emit_key(const LeftGraph::vertex_type& vertex);
      * \param right_emit_key A function which takes a vertex_type from the
      *  right graph and emits an integral key value. 
      *  Can be a lambda, of the prototype:
      *   size_t right_emit_key(const RightGraph::vertex_type& vertex);
      *
      * The semantics of the key depend on the actual join operation performed.
      * This function must be called by all machines.
      *
      * left_emit_key and right_emit_key are functions (or lambda) with the 
      * following prototype:
      * \code
      * size_t left_emit_key(const graph_1_type::vertex_type& vertex);
      * size_t right_emit_key(const graph_2_type::vertex_type& vertex);
      * \endcode
      * They essentially take as a constant argument, the vertex of their 
      * respective graphs, and return an integer key. If a vertex emits the key
      * (size_t)(-1) it does not participate in the join.
      *
      * prepare_injective_join() only needs to be called once. After which an 
      * arbitrary number of left_injective_join() and right_injective_join() 
      * calls may be made.
      *
      * If after a join, a new join is to be performed on the same graph using
      * new data, or new emit functions, prepare_injective_join() can be called
      * again to recompute the join. 
      */
    template <typename LeftEmitKey, typename RightEmitKey>
    void prepare_injective_join(LeftEmitKey left_emit_key, 
                                RightEmitKey right_emit_key) {
      typedef std::pair<size_t, vertex_id_type> key_vertex_pair;
      // Basically, what we are trying to do is to figure out, for each vertex
      // on one side of the graph, which vertices for the other graph
      // (and on on which machines) emitted the same key.
      //
      // The target datastructure is:
      // vtx_to_key[vtx]: The key for each vertex
      // opposing_join_proc[vtx]: Machines which hold a vertex on the opposing
      //                          graph which emitted the same key
      // key_to_vtx[key] Mapping of keys to vertices. 
      
      // resize the left index
      // resize the right index

      reset_and_fill_injective_index(left_inj_index, 
                                     left_graph, 
                                     left_emit_key, "left graph");    

      reset_and_fill_injective_index(right_inj_index, 
                                     right_graph, 
                                     right_emit_key, "right graph");    
      rmi.barrier(); 
      // now, we need cross join across all machines to figure out the 
      // opposing join proc
      // we need to do this twice. Once for left, and once for right. 
      compute_injective_join();
    }

    /**
     * \brief Performs an injective join from the right graph to the left graph.
     * 
     * \tparam JoinOp The type of the joinop function. It should
     *   not be necessary to specify this. C++ type inference should be able
      *   to infer this automatically.
     *
     * \param join_op The joining function. May be a function pointer or a 
     * lambda matching the prototype
     * void join_op(LeftGraph::vertex_type& left_vertex, 
     *              const RightGraph::vertex_data_type right_vertex_data);
     * 
     * prepare_injective_join() must be called before hand.
     * All machines must call this function. join_op will be called on each
     * left vertex with the data on a right vertex which emitted the same key
     * in prepare_injective_join(). The join_op function is allowed to modify
     * the vertex data on the left graph.
     */
    template <typename JoinOp>
    void left_injective_join(JoinOp join_op) {
      injective_join(left_inj_index, left_graph,
                     right_inj_index, right_graph, 
                     join_op);
    }


    /**
     * \brief Performs an injective join from the left graph to the right graph.
     * 
     * \tparam JoinOp The type of the joinop function. It should
     *   not be necessary to specify this. C++ type inference should be able
      *   to infer this automatically.
     *
     * \param join_op The joining function. May be a function pointer or a 
     * lambda matching the prototype
     * void join_op(RightGraph::vertex_type& right_vertex, 
     *              const LeftGraph::vertex_data_type left_vertex_data);
     * 
     * prepare_injective_join() must be called before hand.
     * All machines must call this function. join_op will be called on each
     * rght vertex with the data on a left vertex which emitted the same key
     * in prepare_injective_join(). The join_op function is allowed to modify
     * the vertex data on the right graph.
     */
    template <typename JoinOp>
    void right_injective_join(JoinOp join_op) {
      injective_join(right_inj_index, right_graph,
                     left_inj_index, left_graph, 
                     join_op);
    }

  private:
    template <typename Graph, typename EmitKey>
    void reset_and_fill_injective_index(injective_join_index& idx,
                                        Graph& graph,
                                        EmitKey& emit_key,
                                        const char* message) {
      // clear the data
      idx.vtx_to_key.resize(graph.num_local_vertices());
      idx.key_to_vtx.clear(); 
      idx.opposing_join_proc.resize(graph.num_local_vertices(), (procid_t)(-1));
      // loop through vertices, get the key and fill vtx_to_key and key_to_vtx
      for(lvid_type v = 0; v < graph.num_local_vertices(); ++v) {
        typename Graph::local_vertex_type lv = graph.l_vertex(v);
        if (lv.owned()) {
          typename Graph::vertex_type vtx(lv);
          size_t key = emit_key(vtx);
          idx.vtx_to_key[v] = key;
          if (key != (size_t)(-1)) {
            if (idx.key_to_vtx.count(key) > 0) {
              logstream(LOG_ERROR) << "Duplicate key in " << message << std::endl;
              logstream(LOG_ERROR) << "Duplicate keys not permitted" << std::endl;
              throw "Duplicate Key in Join";
            }
            idx.key_to_vtx.insert(std::make_pair(key, v));
          }
        }
      }
    }

    void compute_injective_join() {
      std::vector<std::vector<size_t> > left_keys = 
          get_procs_with_keys(left_inj_index.vtx_to_key, left_graph);
      std::vector<std::vector<size_t> > right_keys = 
          get_procs_with_keys(right_inj_index.vtx_to_key, right_graph);
      // now. for each key on the right, I need to figure out which proc it
      // belongs in. and vice versa. This is actually kind of annoying.
      // but since it is one-to-one, I only need to make a hash map of one side.
      hopscotch_map<size_t, procid_t> left_key_to_procs;

      // construct a hash table of keys to procs
      // clear frequently to use less memory
      for (size_t p = 0; p < left_keys.size(); ++p) {
        for (size_t i = 0; i < left_keys[p].size(); ++i) {
          ASSERT_MSG(left_key_to_procs.count(left_keys[p][i]) == 0,
                     "Duplicate keys not permitted for left graph keys in injective join");
          left_key_to_procs.insert(std::make_pair(left_keys[p][i], p));
        }
        std::vector<size_t>().swap(left_keys[p]);
      }
      left_keys.clear();
     
      std::vector<
          std::vector<
              std::pair<size_t, procid_t> > > left_match(rmi.numprocs());
      std::vector<
          std::vector<
              std::pair<size_t, procid_t> > > right_match(rmi.numprocs());

      // now for each key on the right, find the matching key on the left
      for (size_t p = 0; p < right_keys.size(); ++p) {
        for (size_t i = 0; i < right_keys[p].size(); ++i) {
          size_t key = right_keys[p][i];
          hopscotch_map<size_t, procid_t>::iterator iter =
              left_key_to_procs.find(key);
          if (iter != left_key_to_procs.end()) {
            ASSERT_MSG(iter->second != (procid_t)(-1),
                       "Duplicate keys not permitted for right graph keys in injective join");
            // we have a match
            procid_t left_proc = iter->second;
            procid_t right_proc = p;
            // now. left has to be told about right and right
            // has to be told about left
            left_match[left_proc].push_back(std::make_pair(key, right_proc));
            right_match[right_proc].push_back(std::make_pair(key, left_proc));
            // set the map entry to -1 
            // so we know if it is ever reused
            iter->second = (procid_t)(-1); 
          }
        }
        std::vector<size_t>().swap(right_keys[p]);
      }
      right_keys.clear();

      rmi.all_to_all(left_match);
      rmi.all_to_all(right_match);
      // fill in the index
      // go through the left match and set up the opposing index to based
      // on the match result
#ifdef _OPENMP
#pragma omp parallel for
#endif
      for (size_t p = 0;p < left_match.size(); ++p) {
        for (size_t i = 0;i < left_match[p].size(); ++i) {
          // search for the key in the left index
          hopscotch_map<size_t, vertex_id_type>::const_iterator iter = 
              left_inj_index.key_to_vtx.find(left_match[p][i].first);
          ASSERT_TRUE(iter != left_inj_index.key_to_vtx.end());
          // fill in the match
          left_inj_index.opposing_join_proc[iter->second] = left_match[p][i].second;
        }
      }
      left_match.clear();
      // repeat for the right match
#ifdef _OPENMP
#pragma omp parallel for
#endif
      for (size_t p = 0;p < right_match.size(); ++p) {
        for (size_t i = 0;i < right_match[p].size(); ++i) {
          // search for the key in the right index
          hopscotch_map<size_t, vertex_id_type>::const_iterator iter = 
              right_inj_index.key_to_vtx.find(right_match[p][i].first);
          ASSERT_TRUE(iter != right_inj_index.key_to_vtx.end());
          // fill in the match
          right_inj_index.opposing_join_proc[iter->second] = right_match[p][i].second;
        }
      }
      right_match.clear();
      // ok done.
    }

    // each key is assigned to a controlling machine, who receives
    // the partial list of keys every other machine owns.
    template <typename Graph>
    std::vector<std::vector<size_t> > 
        get_procs_with_keys(const std::vector<size_t>& local_key_list, Graph& g) {
      // this machine will get all keys from each processor where
      // key = procid mod numprocs
      std::vector<std::vector<size_t> > procs_with_keys(rmi.numprocs());
      for (size_t i = 0; i < local_key_list.size(); ++i) {
        if (g.l_vertex(i).owned() && local_key_list[i] != (size_t)(-1)) {
          procid_t target_procid = local_key_list[i] % rmi.numprocs();
          procs_with_keys[target_procid].push_back(local_key_list[i]);
        }
      }
      rmi.all_to_all(procs_with_keys);
      return procs_with_keys;
    }

    template <typename TargetGraph, typename SourceGraph, typename JoinOp>
    void injective_join(injective_join_index& target,
                        TargetGraph& target_graph,
                        injective_join_index& source,
                        SourceGraph& source_graph,
                        JoinOp joinop) {
      // build up the exchange structure.
      // move right vertex data to left
      std::vector<
          std::vector<
              std::pair<size_t, typename SourceGraph::vertex_data_type> > > 
            source_data(rmi.numprocs());

      for (size_t i = 0; i < source.opposing_join_proc.size(); ++i) {
        if (source_graph.l_vertex(i).owned()) {
          procid_t target_proc = source.opposing_join_proc[i];
          if (target_proc >= 0 && target_proc < rmi.numprocs()) {
            source_data[target_proc].push_back(
                std::make_pair(source.vtx_to_key[i],
                               source_graph.l_vertex(i).data()));
          }
        }
      }
      // exchange
      rmi.all_to_all(source_data);
      // ok. now join against left
#ifdef _OPENMP
#pragma omp parallel for
#endif
      for (size_t p = 0;p < source_data.size(); ++p) {
        for (size_t i = 0;i < source_data[p].size(); ++i) {
          // find the target vertex with the matching key
          hopscotch_map<size_t, vertex_id_type>::const_iterator iter = 
              target.key_to_vtx.find(source_data[p][i].first);
          ASSERT_TRUE(iter != target.key_to_vtx.end());
          // found it!
          typename TargetGraph::local_vertex_type 
              lvtx = target_graph.l_vertex(iter->second);
          typename TargetGraph::vertex_type vtx(lvtx);
          joinop(vtx, source_data[p][i].second);
        }
      }
      target_graph.synchronize();
    }
};

} // namespace graphlab

#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_batch_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_BATCH_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_BATCH_INGRESS_HPP

#include <boost/unordered_set.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/graph_hash.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab {
  template<typename VertexData, typename EdgeData>
    class distributed_graph;

  template<typename VertexData, typename EdgeData>
  class distributed_batch_ingress : 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData edge_data_type;

    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

    dc_dist_object<distributed_batch_ingress> rpc;
    typedef distributed_ingress_base<VertexData, EdgeData> base_type;

    mutex local_graph_lock;
    mutex lvid2record_lock;

    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type;

    /** Type of the degree hash table: 
     * a map from vertex id to a bitset of length num_procs. */
    typedef typename boost::unordered_map<vertex_id_type, bin_counts_type> 
    dht_degree_table_type;

    /** distributed hash table stored on local machine */ 
    boost::unordered_map<vertex_id_type, bin_counts_type > dht_degree_table;

    /** The map from vertex id to its DHT entry.
     * Must be called with a readlock acquired on dht_degree_table_lock. */
    size_t vid_to_dht_entry_with_readlock(vertex_id_type vid) {
      if (dht_degree_table.count(vid) == 0) {
        dht_degree_table_lock.unlock();
        dht_degree_table_lock.writelock();
        dht_degree_table[vid].clear(); 
        dht_degree_table_lock.unlock();
        dht_degree_table_lock.readlock();
      }
      return vid;
    }
    rwlock dht_degree_table_lock;

    /** Local minibatch buffer */
    /** Number of edges in the current buffer. */
    size_t num_edges; 
    /** Capacity of the local buffer. */
    size_t bufsize; 
    std::vector<std::pair<vertex_id_type, vertex_id_type> > edgesend;
    std::vector<EdgeData> edatasend;
    mutex edgesend_lock;
    /** A set of vertex in the current batch requiring query the DHT. */
    std::vector<boost::unordered_set<vertex_id_type> > query_set;
    /** The map from proc_id to num_edges on that proc */
    std::vector<size_t> proc_num_edges;

    DECLARE_TRACER(batch_ingress_add_edge);
    DECLARE_TRACER(batch_ingress_add_edges);
    DECLARE_TRACER(batch_ingress_compute_assignments);
    DECLARE_TRACER(batch_ingress_request_degree_table);
    DECLARE_TRACER(batch_ingress_get_degree_table);
    DECLARE_TRACER(batch_ingress_update_degree_table);

    /** Ingress tratis. */
    bool usehash;
    bool userecent; 

  public:
    distributed_batch_ingress(distributed_control& dc, graph_type& graph, 
        size_t bufsize = 50000, bool usehash = false, bool userecent = false) :
      base_type(dc, graph), rpc(dc, this), 
      num_edges(0), bufsize(bufsize), query_set(dc.numprocs()),
      proc_num_edges(dc.numprocs()), usehash(usehash), userecent(userecent) { 
       rpc.barrier(); 

      INITIALIZE_TRACER(batch_ingress_add_edge, "Time spent in add edge");
      INITIALIZE_TRACER(batch_ingress_add_edges, "Time spent in add block edges" );
      INITIALIZE_TRACER(batch_ingress_compute_assignments, "Time spent in compute assignment");
      INITIALIZE_TRACER(batch_ingress_request_degree_table, "Time spent in requesting assignment");
      INITIALIZE_TRACER(batch_ingress_get_degree_table, "Time spent in retrieve degree table");
      INITIALIZE_TRACER(batch_ingress_update_degree_table, "Time spent in update degree table");
     }

    /** Adds an edge to the batch ingress buffer, and updates the query set. */
    void add_edge(vertex_id_type source, vertex_id_type target, const EdgeData& edata) {
      BEGIN_TRACEPOINT(batch_ingress_add_edge);
      edgesend_lock.lock();
      ASSERT_LT(edgesend.size(), bufsize);
      edgesend.push_back(std::make_pair(source, target)); 
      edatasend.push_back(edata);        
      query_set[graph_hash::hash_vertex(source) % rpc.numprocs()].insert(source);
      query_set[graph_hash::hash_vertex(target) % rpc.numprocs()].insert(target);
      ++num_edges;
      edgesend_lock.unlock();
      END_TRACEPOINT(batch_ingress_add_edge);
      if (is_full()) flush();
    } // end of add_edge

    /** Flush the buffer and call base finalize. */; 
    void finalize() { 
      rpc.full_barrier();
      flush(); 
      rpc.full_barrier();
      base_type::finalize();
    } // end of finalize


  private:

    // HELPER ROUTINES =======================================================>    
    /** Add edges in block to the local current graph. */
    void add_edges(const std::vector<vertex_id_type>& source_arr, 
        const std::vector<vertex_id_type>& target_arr, 
        const std::vector<EdgeData>& edata_arr) {

      BEGIN_TRACEPOINT(batch_ingress_add_edges);
      ASSERT_TRUE((source_arr.size() == target_arr.size())
          && (source_arr.size() == edata_arr.size())); 
      if (source_arr.size() == 0) return;

      std::vector<lvid_type> local_source_arr; 
      local_source_arr.reserve(source_arr.size());
      std::vector<lvid_type> local_target_arr;
      local_target_arr.reserve(source_arr.size());

      // The map from vertex_id to its degree on this proc.
      std::vector<std::vector<vertex_id_type> > local_degree_count(rpc.numprocs());

      lvid_type max_lvid = 0;

      lvid2record_lock.lock();
      // Lock and update the lvid2record.
      for (size_t i = 0; i < source_arr.size(); ++i) {
        vertex_id_type source = source_arr[i];
        vertex_id_type target = target_arr[i]; 
        lvid_type lvid_source(-1);
        lvid_type lvid_target(-1);
        // typedef typename boost::unordered_map<vertex_id_type, lvid_type>::iterator 
          // vid2lvid_iter;
        typedef typename hopscotch_map<vertex_id_type, lvid_type>::iterator
          vid2lvid_iter;
        vid2lvid_iter iter;

          iter = base_type::graph.vid2lvid.find(source);
          if (iter == base_type::graph.vid2lvid.end()) {
            lvid_source = base_type::graph.vid2lvid.size();
            base_type::graph.vid2lvid[source]=lvid_source;
            base_type::graph.lvid2record.push_back(vertex_record(source));
          } else {
            lvid_source = iter->second;
          }

          iter = base_type::graph.vid2lvid.find(target);
          if (iter == base_type::graph.vid2lvid.end()) {
            lvid_target = base_type::graph.vid2lvid.size();
            base_type::graph.vid2lvid[target]=lvid_target;
            base_type::graph.lvid2record.push_back(vertex_record(target));
          } else {
            lvid_target = iter->second;
          }

        local_source_arr.push_back(lvid_source);
        local_target_arr.push_back(lvid_target);
        max_lvid = std::max(std::max(lvid_source, lvid_target), 
            max_lvid);

        local_degree_count[graph_hash::hash_vertex(source) % rpc.numprocs()].push_back(source);
        local_degree_count[graph_hash::hash_vertex(target) % rpc.numprocs()].push_back(target);
      }
      lvid2record_lock.unlock();

      // Send out local_degree count.
      for (size_t i = 0; i < rpc.numprocs(); ++i) {
        if (i != rpc.procid()) {
          rpc.remote_call(i, 
                          &distributed_batch_ingress::block_add_degree_counts, 
                          rpc.procid(),
                          local_degree_count[i]);
        } else {
          block_add_degree_counts(rpc.procid(), local_degree_count[i]);
        }
        local_degree_count[i].clear();
      }

      // Lock and add edges to local graph.
      local_graph_lock.lock();
      if (max_lvid >= base_type::graph.local_graph.num_vertices()) {
        //std::cout << rpc.procid() << ": " << max_lvid << std::endl;
        base_type::graph.local_graph.resize(max_lvid + 1);
      }
      base_type::graph.local_graph.add_edges(local_source_arr, 
                                             local_target_arr, edata_arr);
      local_graph_lock.unlock();
 
      END_TRACEPOINT(batch_ingress_add_edges);
    } // end of add edges

    /** Updates the local part of the distributed table. */
    void block_add_degree_counts (procid_t pid, std::vector<vertex_id_type>& whohas) {
      BEGIN_TRACEPOINT(batch_ingress_update_degree_table);
      dht_degree_table_lock.readlock();
      foreach (vertex_id_type& vid, whohas) {
        size_t idx = vid_to_dht_entry_with_readlock(vid);
        dht_degree_table[idx].set_bit_unsync(pid);
      }
      dht_degree_table_lock.unlock();
      END_TRACEPOINT(batch_ingress_update_degree_table);
    }

    /** Returns the degree counts by querying the distributed table. */
    dht_degree_table_type 
    block_get_degree_table(const boost::unordered_set<vertex_id_type>& vid_query) {
      BEGIN_TRACEPOINT(batch_ingress_get_degree_table);
      dht_degree_table_type answer;
      dht_degree_table_lock.readlock();
      foreach (vertex_id_type qvid, vid_query) {
        answer[qvid] = dht_degree_table[vid_to_dht_entry_with_readlock(qvid)]; 
      }
      dht_degree_table_lock.unlock();
      END_TRACEPOINT(batch_ingress_get_degree_table);
      return answer;
    }  // end of block get degree table


    /** Assign edges in the buffer greedily using the recent query of DHT. */
   void assign_edges(std::vector<std::vector<vertex_id_type> >& proc_src,
                     std::vector<std::vector<vertex_id_type> >& proc_dst,
                     std::vector<std::vector<EdgeData> >& proc_edata) {
     ASSERT_EQ(num_edges, edgesend.size());

     edgesend_lock.lock();
     
     if (num_edges == 0) {
      edgesend_lock.unlock();
      return;
     }
     BEGIN_TRACEPOINT(batch_ingress_request_degree_table);
     std::vector<dht_degree_table_type> degree_table(rpc.numprocs());
     
     // Query the DHT.
     for (size_t i = 0; i < rpc.numprocs(); ++i) {
       if (i == rpc.procid()) {
         degree_table[i] = block_get_degree_table(query_set[i]);
       } else {
         degree_table[i] = 
           rpc.remote_request(i, 
               &distributed_batch_ingress::block_get_degree_table,
               query_set[i]);
       }
       query_set[i].clear();
     }
     END_TRACEPOINT(batch_ingress_request_degree_table);

     // Make assigment.
     for (size_t i = 0; i < num_edges; ++i) {
       std::pair<vertex_id_type, vertex_id_type>& e = 
         edgesend[i];

       BEGIN_TRACEPOINT(batch_ingress_compute_assignments);
       size_t src_proc = graph_hash::hash_vertex(e.first) % rpc.numprocs();
       size_t dst_proc = graph_hash::hash_vertex(e.second) % rpc.numprocs();
       bin_counts_type& src_degree = degree_table[src_proc][e.first];
       bin_counts_type& dst_degree = degree_table[dst_proc][e.second];
       procid_t proc = base_type::edge_decision.edge_to_proc_greedy(e.first, e.second, 
           src_degree, dst_degree, proc_num_edges, usehash, userecent);
       END_TRACEPOINT(batch_ingress_compute_assignments);

       ASSERT_LT(proc, proc_src.size());
       proc_src[proc].push_back(e.first);
       proc_dst[proc].push_back(e.second);
       proc_edata[proc].push_back(edatasend[i]);
     }

     // Clear the sending buffer.
     edgesend.clear();
     edatasend.clear();
     edgesend_lock.unlock();
   } // end assign edge

    /** Flushes all edges in the buffer. */
    void flush() {
      std::vector< std::vector<vertex_id_type> > proc_src(rpc.numprocs());
      std::vector< std::vector<vertex_id_type> > proc_dst(rpc.numprocs());
      std::vector< std::vector<EdgeData> > proc_edata(rpc.numprocs());
      assign_edges(proc_src, proc_dst, proc_edata);
      for (size_t i = 0; i < proc_src.size(); ++i) {
        if (proc_src[i].size() == 0) 
          continue;
        if (i == rpc.procid()) {
          add_edges(proc_src[i], proc_dst[i], proc_edata[i]);
          num_edges -= proc_src[i].size();
        } else {
          rpc.remote_call(i, &distributed_batch_ingress::add_edges,
              proc_src[i], proc_dst[i], proc_edata[i]);
          num_edges -= proc_src[i].size();
        } // end if
      } // end for
    } // end flush

    /** Returns the number of edges  in the buffer. */
    size_t size() { return num_edges; }

    /** Returns whether the buffer is full. */
    bool is_full() { return size() >= bufsize; }
  }; // end of distributed_batch_ingress

}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_constrained_batch_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_CONSTRAINED_BATCH_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_CONSTRAINED_BATCH_INGRESS_HPP

#include <boost/unordered_set.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/graph/graph_hash.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/graph/ingress/sharding_constraint.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
    class distributed_graph;

  template<typename VertexData, typename EdgeData>
  class distributed_constrained_batch_ingress : 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData edge_data_type;

    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

    dc_dist_object<distributed_constrained_batch_ingress> rpc;
    typedef distributed_ingress_base<VertexData, EdgeData> base_type;

    mutex local_graph_lock;
    mutex lvid2record_lock;

    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type;

    /** Type of the degree hash table: 
     * a map from vertex id to a bitset of length num_procs. */
    typedef typename boost::unordered_map<vertex_id_type, bin_counts_type> 
    dht_degree_table_type;

    /** distributed hash table stored on local machine */ 
    std::vector<bin_counts_type > dht_degree_table;

    /** The map from vertex id to its DHT entry.
     * Must be called with a readlock acquired on dht_degree_table_lock. */
    size_t vid_to_dht_entry_with_readlock(vertex_id_type vid) {
      size_t idx = (vid - rpc.procid()) / rpc.numprocs();
      if (dht_degree_table.size() <= idx) {
        dht_degree_table_lock.unlock();
        dht_degree_table_lock.writelock();
         
        if (dht_degree_table.size() <= idx) {
          size_t newsize = std::max(dht_degree_table.size() * 2, idx + 1);
          dht_degree_table.resize(newsize);
        }
        dht_degree_table_lock.unlock();
        dht_degree_table_lock.readlock();
      }
      return idx;
    }
    rwlock dht_degree_table_lock;

    /** Local minibatch buffer */
    /** Number of edges in the current buffer. */
    size_t num_edges; 
    /** Capacity of the local buffer. */
    size_t bufsize; 
    std::vector<std::pair<vertex_id_type, vertex_id_type> > edgesend;
    std::vector<EdgeData> edatasend;
    mutex edgesend_lock;
    /** A set of vertex in the current batch requiring query the DHT. */
    std::vector<boost::unordered_set<vertex_id_type> > query_set;
    /** The map from proc_id to num_edges on that proc */
    std::vector<size_t> proc_num_edges;

    /** Ingress tratis. */
    bool usehash;
    bool userecent; 

    sharding_constraint* constraint;
    boost::hash<vertex_id_type> hashvid;

  public:
    distributed_constrained_batch_ingress(distributed_control& dc, graph_type& graph, 
        size_t bufsize = 50000, bool usehash = false, bool userecent = false) :
      base_type(dc, graph), rpc(dc, this), 
      num_edges(0), bufsize(bufsize), query_set(dc.numprocs()),
      proc_num_edges(dc.numprocs()), usehash(usehash), userecent(userecent) { 
        constraint = new sharding_constraint(dc.numprocs(), "grid"); 
        rpc.barrier(); 
      }
      ~distributed_constrained_batch_ingress() { 
         delete constraint;
       }

    /** Adds an edge to the batch ingress buffer, and updates the query set. */
    void add_edge(vertex_id_type source, vertex_id_type target, const EdgeData& edata) {
      edgesend_lock.lock();
      ASSERT_LT(edgesend.size(), bufsize);
      edgesend.push_back(std::make_pair(source, target)); 
      edatasend.push_back(edata);        
      query_set[graph_hash::hash_vertex(source) % rpc.numprocs()].insert(source);
      query_set[graph_hash::hash_vertex(target) % rpc.numprocs()].insert(target);
      ++num_edges;
      edgesend_lock.unlock();
      if (is_full()) flush();
    } // end of add_edge

    /** Flush the buffer and call base finalize. */; 
    void finalize() { 
      rpc.full_barrier();
      flush(); 
      rpc.full_barrier();
      base_type::finalize();
    } // end of finalize


  private:

    // HELPER ROUTINES =======================================================>    
    /** Add edges in block to the local current graph. */
    void add_edges(const std::vector<vertex_id_type>& source_arr, 
        const std::vector<vertex_id_type>& target_arr, 
        const std::vector<EdgeData>& edata_arr) {

      ASSERT_TRUE((source_arr.size() == target_arr.size())
          && (source_arr.size() == edata_arr.size())); 
      if (source_arr.size() == 0) return;

      std::vector<lvid_type> local_source_arr; 
      local_source_arr.reserve(source_arr.size());
      std::vector<lvid_type> local_target_arr;
      local_target_arr.reserve(source_arr.size());

      // The map from vertex_id to its degree on this proc.
      std::vector<std::vector<vertex_id_type> > local_degree_count(rpc.numprocs());

      lvid_type max_lvid = 0;

      lvid2record_lock.lock();
      // Lock and update the lvid2record.
      for (size_t i = 0; i < source_arr.size(); ++i) {
        vertex_id_type source = source_arr[i];
        vertex_id_type target = target_arr[i]; 
        lvid_type lvid_source(-1);
        lvid_type lvid_target(-1);
        // typedef typename boost::unordered_map<vertex_id_type, lvid_type>::iterator 
          // vid2lvid_iter;
        typedef typename cuckoo_map_pow2<vertex_id_type, lvid_type, 3, uint32_t>::iterator
          vid2lvid_iter;
        vid2lvid_iter iter;

          iter = base_type::graph.vid2lvid.find(source);
          if (iter == base_type::graph.vid2lvid.end()) {
            lvid_source = base_type::graph.vid2lvid.size();
            base_type::graph.vid2lvid[source]=lvid_source;
            base_type::graph.lvid2record.push_back(vertex_record(source));
          } else {
            lvid_source = iter->second;
          }

          iter = base_type::graph.vid2lvid.find(target);
          if (iter == base_type::graph.vid2lvid.end()) {
            lvid_target = base_type::graph.vid2lvid.size();
            base_type::graph.vid2lvid[target]=lvid_target;
            base_type::graph.lvid2record.push_back(vertex_record(target));
          } else {
            lvid_target = iter->second;
          }

        local_source_arr.push_back(lvid_source);
        local_target_arr.push_back(lvid_target);
        max_lvid = std::max(std::max(lvid_source, lvid_target), 
            max_lvid);

        local_degree_count[graph_hash::hash_vertex(source) % rpc.numprocs()].push_back(source);
        local_degree_count[graph_hash::hash_vertex(target) % rpc.numprocs()].push_back(target);
      }
      lvid2record_lock.unlock();

      // Send out local_degree count.
      for (size_t i = 0; i < rpc.numprocs(); ++i) {
        if (i != rpc.procid()) {
          rpc.remote_call(i, 
                          &distributed_constrained_batch_ingress::block_add_degree_counts, 
                          rpc.procid(),
                          local_degree_count[i]);
        } else {
          block_add_degree_counts(rpc.procid(), local_degree_count[i]);
        }
        local_degree_count[i].clear();
      }

      // Lock and add edges to local graph.
      local_graph_lock.lock();
      if (max_lvid >= base_type::graph.local_graph.num_vertices()) {
        //std::cout << rpc.procid() << ": " << max_lvid << std::endl;
        base_type::graph.local_graph.resize(max_lvid + 1);
      }
      base_type::graph.local_graph.add_edges(local_source_arr, 
                                             local_target_arr, edata_arr);
      local_graph_lock.unlock();
 
    } // end of add edges

    /** Updates the local part of the distributed table. */
    void block_add_degree_counts (procid_t pid, std::vector<vertex_id_type>& whohas) {
      dht_degree_table_lock.readlock();
      foreach (vertex_id_type& vid, whohas) {
        size_t idx = vid_to_dht_entry_with_readlock(vid);
        dht_degree_table[idx].set_bit_unsync(pid);
      }
      dht_degree_table_lock.unlock();
    }

    /** Returns the degree counts by querying the distributed table. */
    dht_degree_table_type 
    block_get_degree_table(const boost::unordered_set<vertex_id_type>& vid_query) {
      dht_degree_table_type answer;
      dht_degree_table_lock.readlock();
      foreach (vertex_id_type qvid, vid_query) {
        answer[qvid] = dht_degree_table[vid_to_dht_entry_with_readlock(qvid)]; 
      }
      dht_degree_table_lock.unlock();
      return answer;
    }  // end of block get degree table


    /** Assign edges in the buffer greedily using the recent query of DHT. */
   void assign_edges(std::vector<std::vector<vertex_id_type> >& proc_src,
                     std::vector<std::vector<vertex_id_type> >& proc_dst,
                     std::vector<std::vector<EdgeData> >& proc_edata) {
     ASSERT_EQ(num_edges, edgesend.size());

     edgesend_lock.lock();
     
     if (num_edges == 0) {
      edgesend_lock.unlock();
      return;
     }
     std::vector<dht_degree_table_type> degree_table(rpc.numprocs());
     
     // Query the DHT.
     for (size_t i = 0; i < rpc.numprocs(); ++i) {
       if (i == rpc.procid()) {
         degree_table[i] = block_get_degree_table(query_set[i]);
       } else {
         degree_table[i] = 
           rpc.remote_request(i, 
               &distributed_constrained_batch_ingress::block_get_degree_table,
               query_set[i]);
       }
       query_set[i].clear();
     }

     // Make assigment.
     for (size_t i = 0; i < num_edges; ++i) {
       std::pair<vertex_id_type, vertex_id_type>& e = 
         edgesend[i];

       size_t src_proc = graph_hash::hash_vertex(e.first) % rpc.numprocs();
       size_t dst_proc = graph_hash::hash_vertex(e.second) % rpc.numprocs();
       bin_counts_type& src_degree = degree_table[src_proc][e.first];
       bin_counts_type& dst_degree = degree_table[dst_proc][e.second];

       const std::vector<procid_t>& candidates = 
         constraint->get_joint_neighbors(get_master(e.first), get_master(e.second));

       procid_t proc = base_type::edge_decision.edge_to_proc_greedy(e.first, e.second, 
           src_degree, dst_degree, candidates, proc_num_edges, usehash, userecent);

       ASSERT_LT(proc, proc_src.size());
       proc_src[proc].push_back(e.first);
       proc_dst[proc].push_back(e.second);
       proc_edata[proc].push_back(edatasend[i]);
     }

     // Clear the sending buffer.
     edgesend.clear();
     edatasend.clear();
     edgesend_lock.unlock();
   } // end assign edge

    /** Flushes all edges in the buffer. */
    void flush() {
      std::vector< std::vector<vertex_id_type> > proc_src(rpc.numprocs());
      std::vector< std::vector<vertex_id_type> > proc_dst(rpc.numprocs());
      std::vector< std::vector<EdgeData> > proc_edata(rpc.numprocs());
      assign_edges(proc_src, proc_dst, proc_edata);
      for (size_t i = 0; i < proc_src.size(); ++i) {
        if (proc_src[i].size() == 0) 
          continue;
        if (i == rpc.procid()) {
          add_edges(proc_src[i], proc_dst[i], proc_edata[i]);
          num_edges -= proc_src[i].size();
        } else {
          rpc.remote_call(i, &distributed_constrained_batch_ingress::add_edges,
              proc_src[i], proc_dst[i], proc_edata[i]);
          num_edges -= proc_src[i].size();
        } // end if
      } // end for
    } // end flush

    /** Returns the number of edges  in the buffer. */
    size_t size() { return num_edges; }

    /** Returns whether the buffer is full. */
    bool is_full() { return size() >= bufsize; }

    /** Return the master shard of the vertex. */
    procid_t get_master (vertex_id_type vid) {
      return hashvid(vid) % base_type::rpc.numprocs();
    }


  }; // end of distributed_constrained_batch_ingress


}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_constrained_oblivious_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_CONSTRAINED_OBLIVIOUS_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_CONSTRAINED_OBLIVIOUS_INGRESS_HPP


#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/idistributed_ingress.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/ingress/ingress_edge_decision.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/cuckoo_map_pow2.hpp>
#include <graphlab/graph/ingress/sharding_constraint.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
    class distributed_graph;

  /**
   * \brief Ingress object assigning edges using randoming hash function.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_constrained_oblivious_ingress: 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;
    
    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

    typedef distributed_ingress_base<VertexData, EdgeData> base_type;
    // typedef typename boost::unordered_map<vertex_id_type, std::vector<size_t> > degree_hash_table_type;
    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type; 

    /** Type of the degree hash table: 
     * a map from vertex id to a bitset of length num_procs. */
    typedef cuckoo_map_pow2<vertex_id_type, bin_counts_type,3,uint32_t> degree_hash_table_type;
    degree_hash_table_type dht;

    /** Array of number of edges on each proc. */
    std::vector<size_t> proc_num_edges;

    /** Ingress tratis. */
    bool usehash;
    bool userecent;

    sharding_constraint* constraint;
    boost::hash<vertex_id_type> hashvid;

  public:
    distributed_constrained_oblivious_ingress(distributed_control& dc, graph_type& graph, bool usehash = false, bool userecent = false) :
      base_type(dc, graph),
      dht(-1),proc_num_edges(dc.numprocs()), usehash(usehash), userecent(userecent) { 
        constraint = new sharding_constraint(dc.numprocs(), "grid"); 
     }

    ~distributed_constrained_oblivious_ingress() { 
      delete constraint;
    }

    /** Add an edge to the ingress object using oblivious greedy assignment. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      dht[source]; dht[target];
      const std::vector<procid_t>& candidates = 
        constraint->get_joint_neighbors(get_master(source), get_master(target));
      const procid_t owning_proc = 
        base_type::edge_decision.edge_to_proc_greedy(source, target, dht[source], dht[target], candidates, proc_num_edges, usehash, userecent);
      typedef typename base_type::edge_buffer_record edge_buffer_record;
      edge_buffer_record record(source, target, edata);
      base_type::edge_exchange.send(owning_proc, record);
    } // end of add edge

    virtual void finalize() {
     dht.clear();
     distributed_ingress_base<VertexData, EdgeData>::finalize(); 
    }

  private:
    procid_t get_master (vertex_id_type vid) {
      return hashvid(vid) % base_type::rpc.numprocs();
    }


  }; // end of distributed_ob_ingress

}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_constrained_random_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_CONSTRAINED_RANDOM_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_CONSTRAINED_RANDOM_INGRESS_HPP

#include <boost/functional/hash.hpp>

#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/graph/ingress/sharding_constraint.hpp>
#include <graphlab/graph/ingress/ingress_edge_decision.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class distributed_graph;

  /**
   * \brief Ingress object assigning edges using randoming hash function.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_constrained_random_ingress : 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData edge_data_type;


    typedef distributed_ingress_base<VertexData, EdgeData> base_type;

    sharding_constraint* constraint;
    boost::hash<vertex_id_type> hashvid;

  public:
    distributed_constrained_random_ingress(distributed_control& dc, graph_type& graph,
                                           const std::string& method) :
    base_type(dc, graph) {
      constraint = new sharding_constraint(dc.numprocs(), method);
    } // end of constructor

    ~distributed_constrained_random_ingress() { 
      delete constraint;
    }

    /** Add an edge to the ingress object using random assignment. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      typedef typename base_type::edge_buffer_record edge_buffer_record;

      const std::vector<procid_t>& candidates = constraint->get_joint_neighbors(graph_hash::hash_vertex(source) % base_type::rpc.numprocs(),
                                                                                graph_hash::hash_vertex(target) % base_type::rpc.numprocs());

      const procid_t owning_proc = 
          base_type::edge_decision.edge_to_proc_random(source, target, candidates);


      const edge_buffer_record record(source, target, edata);
#ifdef _OPENMP
      base_type::edge_exchange.send(owning_proc, record, omp_get_thread_num());
#else
      base_type::edge_exchange.send(owning_proc, record);
#endif
    } // end of add edge
  }; // end of distributed_constrained_random_ingress
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_hdrf_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_HDRF_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_HDRF_INGRESS_HPP


#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/ingress/ingress_edge_decision.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/cuckoo_map_pow2.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
    class distributed_graph;

  template<typename VertexData, typename EdgeData>
  class distributed_hdrf_ingress: 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;
    
    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

    typedef distributed_ingress_base<VertexData, EdgeData> base_type;
    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type; 

    /** Type of the replica degree hash table: 
     * a map from vertex id to a bitset of length num_procs.
	 */
    typedef cuckoo_map_pow2<vertex_id_type, bin_counts_type,3,uint32_t> degree_hash_table_type;
    degree_hash_table_type dht;
        
    /** Type of the vertex degree hash table:
     * a map from vertex id to a bitset of length num_procs.
	 */
    typedef cuckoo_map_pow2<vertex_id_type, size_t,3,uint32_t> true_degree_hash_table_type;
    true_degree_hash_table_type degree_dht;

    /** Array of number of edges on each proc. */
    std::vector<size_t> proc_num_edges;

    /** Ingress tratis. */
    bool usehash;
    bool userecent;

  public:
    distributed_hdrf_ingress(distributed_control& dc, graph_type& graph, bool usehash = false, bool userecent = false) :
      base_type(dc, graph),
      dht(-1),degree_dht(-1),proc_num_edges(dc.numprocs()), usehash(usehash), userecent(userecent) {

      //INITIALIZE_TRACER(ob_ingress_compute_assignments, "Time spent in compute assignment");
     }

    ~distributed_hdrf_ingress() { }

    /** Add an edge to the ingress object using hdrf greedy assignment. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      dht[source]; dht[target];
      degree_dht[source]; degree_dht[target];

      const procid_t owning_proc = 
        base_type::edge_decision.edge_to_proc_hdrf(source, target, dht[source], dht[target], degree_dht[source], degree_dht[target], proc_num_edges, usehash, userecent);

      typedef typename base_type::edge_buffer_record edge_buffer_record;
      edge_buffer_record record(source, target, edata);
      base_type::edge_exchange.send(owning_proc, record);
    } // end of add edge

    virtual void finalize() {
     dht.clear();
     degree_dht.clear();
     distributed_ingress_base<VertexData, EdgeData>::finalize();
        
        size_t count = 0;
        for(std::vector<size_t>::iterator it = proc_num_edges.begin(); it != proc_num_edges.end(); ++it) {
            count = count + *it;
        }
        
        logstream(LOG_EMPH) << "TOTAL PROCESSED ELEMENTS: " << count << std::endl;
        
    }

  }; // end of distributed_ob_ingress

}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_identity_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_IDENTITY_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_IDENTITY_INGRESS_HPP

#include <boost/functional/hash.hpp>

#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/distributed_graph.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class distributed_graph;

  /**
   * \brief Ingress object assigning edges to the loading machine itself.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_identity_ingress : 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;

    typedef distributed_ingress_base<VertexData, EdgeData> base_type;

  public:
    distributed_identity_ingress(distributed_control& dc, graph_type& graph) :
    base_type(dc, graph) {
    } // end of constructor

    ~distributed_identity_ingress() { }

    /** Add an edge to the ingress object and assign the edge to itself. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      typedef typename base_type::edge_buffer_record edge_buffer_record;
      const procid_t owning_proc = base_type::rpc.procid();
      const edge_buffer_record record(source, target, edata);
      base_type::edge_exchange.send(owning_proc, record);
    } // end of add edge
  }; // end of distributed_identity_ingress
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_ingress_base.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.  All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License"); you may not
 *  use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_INGRESS_BASE_HPP
#define GRAPHLAB_DISTRIBUTED_INGRESS_BASE_HPP

#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/graph_hash.hpp>
#include <graphlab/graph/ingress/ingress_edge_decision.hpp>
#include <graphlab/graph/graph_gather_apply.hpp>
#include <graphlab/util/memory_info.hpp>
#include <graphlab/util/hopscotch_map.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \brief Implementation of the basic ingress functionality.
   */
  template <typename VertexType, typename EdgeType> 
  class distributed_graph;

  template<typename VertexData, typename EdgeData>
  class distributed_ingress_base {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;
    
    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

   
    /// The rpc interface for this object
    dc_dist_object<distributed_ingress_base> rpc;
    /// The underlying distributed graph object that is being loaded
    graph_type& graph;

    /// Temporary buffers used to store vertex data on ingress
    struct vertex_buffer_record {
      vertex_id_type vid;
      vertex_data_type vdata;
      vertex_buffer_record(vertex_id_type vid = -1,
                           vertex_data_type vdata = vertex_data_type()) :
        vid(vid), vdata(vdata) { }
      void load(iarchive& arc) { arc >> vid >> vdata; }
      void save(oarchive& arc) const { arc << vid << vdata; }
    }; 
    buffered_exchange<vertex_buffer_record> vertex_exchange;

    /// Temporar buffers used to store edge data on ingress
    struct edge_buffer_record {
      vertex_id_type source, target;
      edge_data_type edata;
      edge_buffer_record(const vertex_id_type& source = vertex_id_type(-1), 
                         const vertex_id_type& target = vertex_id_type(-1), 
                         const edge_data_type& edata = edge_data_type()) :
        source(source), target(target), edata(edata) { }
      void load(iarchive& arc) { arc >> source >> target >> edata; }
      void save(oarchive& arc) const { arc << source << target << edata; }
    };
    buffered_exchange<edge_buffer_record> edge_exchange;

    /// Detail vertex record for the second pass coordination. 
    struct vertex_negotiator_record {
      mirror_type mirrors;
      vertex_id_type num_in_edges, num_out_edges;
      bool has_data;
      vertex_data_type vdata;
      vertex_negotiator_record() : num_in_edges(0), num_out_edges(0), has_data(false) { }

      void load(iarchive& arc) { 
        arc >> num_in_edges >> num_out_edges >> mirrors >> has_data >> vdata;
      }
      void save(oarchive& arc) const { 
        arc << num_in_edges << num_out_edges << mirrors << has_data << vdata;
      }

      vertex_negotiator_record operator+=(const vertex_negotiator_record& v2) {
        num_in_edges += v2.num_in_edges;
        num_out_edges += v2.num_out_edges;
        mirrors |= v2.mirrors;
        if (v2.has_data) {
          vdata = v2.vdata;
        }
        return *this;
      }
    };

    /// Ingress decision object for computing the edge destination. 
    ingress_edge_decision<VertexData, EdgeData> edge_decision;

  public:
    distributed_ingress_base(distributed_control& dc, graph_type& graph) :
      rpc(dc, this), graph(graph), 
#ifdef _OPENMP
      vertex_exchange(dc, omp_get_max_threads()), 
      edge_exchange(dc, omp_get_max_threads()),
#else
      vertex_exchange(dc), edge_exchange(dc),
#endif
      edge_decision(dc) {
      rpc.barrier();
    } // end of constructor

    virtual ~distributed_ingress_base() { }

    /** \brief Add an edge to the ingress object. */
    virtual void add_edge(vertex_id_type source, vertex_id_type target,
                          const EdgeData& edata) {
      const procid_t owning_proc = 
        edge_decision.edge_to_proc_random(source, target, rpc.numprocs());
      const edge_buffer_record record(source, target, edata);
#ifdef _OPENMP
      edge_exchange.send(owning_proc, record, omp_get_thread_num());
#else
      edge_exchange.send(owning_proc, record);
#endif
    } // end of add edge


    /** \brief Add an vertex to the ingress object. */
    virtual void add_vertex(vertex_id_type vid, const VertexData& vdata)  { 
      const procid_t owning_proc = graph_hash::hash_vertex(vid) % rpc.numprocs();
      const vertex_buffer_record record(vid, vdata);
#ifdef _OPENMP
      vertex_exchange.send(owning_proc, record, omp_get_thread_num());
#else
      vertex_exchange.send(owning_proc, record);
#endif
    } // end of add vertex


    void set_duplicate_vertex_strategy(
        boost::function<void(vertex_data_type&,
                             const vertex_data_type&)> combine_strategy) {
      vertex_combine_strategy = combine_strategy;
    }


    /** \brief Finalize completes the local graph data structure 
     * and the vertex record information. 
     *
     * \internal
     * The finalization goes through 5 steps:
     *
     * 1. Construct local graph using the received edges, during which
     * the vid2lvid map is built.
     *
     * 2. Construct lvid2record map (of empty entries) using the received vertices. 
     *
     * 3. Complete lvid2record map by exchanging the vertex_info. 
     *
     * 4. Exchange the negotiation records, including singletons. (Local graph 
     * handling singletons). 
     *
     * 5. Exchange global graph statistics.
     */
    virtual void finalize() {

      rpc.full_barrier();

      bool first_time_finalize = false;
      /**
       * Fast pass for first time finalization. 
       */
      if (graph.is_dynamic()) {
        size_t nverts = graph.num_local_vertices();
        rpc.all_reduce(nverts);
        first_time_finalize = (nverts == 0);
      } else {
        first_time_finalize = false;
      }


      if (rpc.procid() == 0) {
        logstream(LOG_EMPH) << "Finalizing Graph..." << std::endl;
      }

      typedef typename hopscotch_map<vertex_id_type, lvid_type>::value_type
        vid2lvid_pair_type;

      typedef typename buffered_exchange<edge_buffer_record>::buffer_type 
        edge_buffer_type;

      typedef typename buffered_exchange<vertex_buffer_record>::buffer_type 
        vertex_buffer_type;

      /**
       * \internal
       * Buffer storage for new vertices to the local graph.
       */
      typedef typename graph_type::hopscotch_map_type vid2lvid_map_type;
      vid2lvid_map_type vid2lvid_buffer;

      /**
       * \internal
       * The begining id assinged to the first new vertex.
       */
      const lvid_type lvid_start  = graph.vid2lvid.size();

      /**
       * \internal
       * Bit field incidate the vertex that is updated during the ingress. 
       */
      dense_bitset updated_lvids(graph.vid2lvid.size());

      /**************************************************************************/
      /*                                                                        */
      /*                       Flush any additional data                        */
      /*                                                                        */
      /**************************************************************************/
      edge_exchange.flush(); vertex_exchange.flush();     

      /**
       * Fast pass for redundant finalization with no graph changes. 
       */
      {
        size_t changed_size = edge_exchange.size() + vertex_exchange.size();
        rpc.all_reduce(changed_size);
        if (changed_size == 0) {
          logstream(LOG_INFO) << "Skipping Graph Finalization because no changes happened..." << std::endl;
          return;
        }
      }

      if(rpc.procid() == 0)       
        memory_info::log_usage("Post Flush");

     
      /**************************************************************************/
      /*                                                                        */
      /*                         Construct local graph                          */
      /*                                                                        */
      /**************************************************************************/
      { // Add all the edges to the local graph
        logstream(LOG_INFO) << "Graph Finalize: constructing local graph" << std::endl;
        const size_t nedges = edge_exchange.size()+1;
        graph.local_graph.reserve_edge_space(nedges + 1);      
        edge_buffer_type edge_buffer;
        procid_t proc;
        while(edge_exchange.recv(proc, edge_buffer)) {
          foreach(const edge_buffer_record& rec, edge_buffer) {
            // Get the source_vlid;
            lvid_type source_lvid(-1);
            if(graph.vid2lvid.find(rec.source) == graph.vid2lvid.end()) {
              if (vid2lvid_buffer.find(rec.source) == vid2lvid_buffer.end()) {
                source_lvid = lvid_start + vid2lvid_buffer.size();
                vid2lvid_buffer[rec.source] = source_lvid;
              } else {
                source_lvid = vid2lvid_buffer[rec.source];
              }
            } else {
              source_lvid = graph.vid2lvid[rec.source];
              updated_lvids.set_bit(source_lvid);
            }
            // Get the target_lvid;
            lvid_type target_lvid(-1);
            if(graph.vid2lvid.find(rec.target) == graph.vid2lvid.end()) {
              if (vid2lvid_buffer.find(rec.target) == vid2lvid_buffer.end()) {
                target_lvid = lvid_start + vid2lvid_buffer.size();
                vid2lvid_buffer[rec.target] = target_lvid;
              } else {
                target_lvid = vid2lvid_buffer[rec.target];
              }
            } else {
              target_lvid = graph.vid2lvid[rec.target];
              updated_lvids.set_bit(target_lvid);
            }
            graph.local_graph.add_edge(source_lvid, target_lvid, rec.edata);
            // std::cout << "add edge " << rec.source << "\t" << rec.target << std::endl;
          } // end of loop over add edges
        } // end for loop over buffers
        edge_exchange.clear();

        ASSERT_EQ(graph.vid2lvid.size()  + vid2lvid_buffer.size(), graph.local_graph.num_vertices());
        if(rpc.procid() == 0)  {
          memory_info::log_usage("Finished populating local graph.");
        }

        // Finalize local graph
        logstream(LOG_INFO) << "Graph Finalize: finalizing local graph." 
                            << std::endl;
        graph.local_graph.finalize();
        logstream(LOG_INFO) << "Local graph info: " << std::endl
                            << "\t nverts: " << graph.local_graph.num_vertices()
                            << std::endl
                            << "\t nedges: " << graph.local_graph.num_edges()
                            << std::endl;
        
        if(rpc.procid() == 0) {
          memory_info::log_usage("Finished finalizing local graph."); 
          // debug
          // std::cout << graph.local_graph << std::endl;
        }
      }

      /**************************************************************************/
      /*                                                                        */
      /*             Receive and add vertex data to masters                     */
      /*                                                                        */
      /**************************************************************************/
      // Setup the map containing all the vertices being negotiated by this machine
      { // Receive any vertex data sent by other machines
        vertex_buffer_type vertex_buffer; procid_t sending_proc(-1);
        while(vertex_exchange.recv(sending_proc, vertex_buffer)) {
          foreach(const vertex_buffer_record& rec, vertex_buffer) {
            lvid_type lvid(-1);
            if (graph.vid2lvid.find(rec.vid) == graph.vid2lvid.end()) {
              if (vid2lvid_buffer.find(rec.vid) == vid2lvid_buffer.end()) {
                lvid = lvid_start + vid2lvid_buffer.size();
                vid2lvid_buffer[rec.vid] = lvid;
              } else {
                lvid = vid2lvid_buffer[rec.vid];
              }
            } else {
              lvid = graph.vid2lvid[rec.vid];
              updated_lvids.set_bit(lvid);
            }
            if (vertex_combine_strategy && lvid < graph.num_local_vertices()) {
              vertex_combine_strategy(graph.l_vertex(lvid).data(), rec.vdata);
            } else {
              graph.local_graph.add_vertex(lvid, rec.vdata);
            }
          }
        }
        vertex_exchange.clear();
        if(rpc.procid() == 0)         
          memory_info::log_usage("Finished adding vertex data");
      } // end of loop to populate vrecmap


      /**************************************************************************/
      /*                                                                        */
      /*        assign vertex data and allocate vertex (meta)data  space        */
      /*                                                                        */
      /**************************************************************************/
      { // Determine masters for all negotiated vertices
        const size_t local_nverts = graph.vid2lvid.size() + vid2lvid_buffer.size();
        graph.lvid2record.reserve(local_nverts);
        graph.lvid2record.resize(local_nverts);
        graph.local_graph.resize(local_nverts);
        foreach(const vid2lvid_pair_type& pair, vid2lvid_buffer) {
            vertex_record& vrec = graph.lvid2record[pair.second];
            vrec.gvid = pair.first;
            vrec.owner = graph_hash::hash_vertex(pair.first) % rpc.numprocs();
        }
        ASSERT_EQ(local_nverts, graph.local_graph.num_vertices());
        ASSERT_EQ(graph.lvid2record.size(), graph.local_graph.num_vertices());
        if(rpc.procid() == 0)       
          memory_info::log_usage("Finihsed allocating lvid2record");
      }

      /**************************************************************************/
      /*                                                                        */
      /*                          Master handshake                              */
      /*                                                                        */
      /**************************************************************************/
      {
#ifdef _OPENMP
        buffered_exchange<vertex_id_type> vid_buffer(rpc.dc(), omp_get_max_threads());
#else
        buffered_exchange<vertex_id_type> vid_buffer(rpc.dc());
#endif

#ifdef _OPENMP
#pragma omp parallel for
#endif
        // send not owned vids to their master
        for (lvid_type i = lvid_start; i < graph.lvid2record.size(); ++i) {
          procid_t master = graph.lvid2record[i].owner;
          if (master != rpc.procid())
#ifdef _OPENMP
            vid_buffer.send(master, graph.lvid2record[i].gvid, omp_get_thread_num());
#else
            vid_buffer.send(master, graph.lvid2record[i].gvid);
#endif
        }
        vid_buffer.flush();
        rpc.barrier();

        // receive all vids owned by me
        mutex flying_vids_lock;
        boost::unordered_map<vertex_id_type, mirror_type> flying_vids;
#ifdef _OPENMP
#pragma omp parallel
#endif
        {
          typename buffered_exchange<vertex_id_type>::buffer_type buffer;
          procid_t recvid;
          while(vid_buffer.recv(recvid, buffer)) {
            foreach(const vertex_id_type vid, buffer) {
              if (graph.vid2lvid.find(vid) == graph.vid2lvid.end()) {
                if (vid2lvid_buffer.find(vid) == vid2lvid_buffer.end()) {
                  flying_vids_lock.lock();
                  mirror_type& mirrors = flying_vids[vid];
                  flying_vids_lock.unlock();
                  mirrors.set_bit(recvid);
                } else {
                  lvid_type lvid = vid2lvid_buffer[vid];
                  graph.lvid2record[lvid]._mirrors.set_bit(recvid);
                }
              } else {
                lvid_type lvid = graph.vid2lvid[vid];
                graph.lvid2record[lvid]._mirrors.set_bit(recvid);
                updated_lvids.set_bit(lvid);
              }
            }
          }
        }

        vid_buffer.clear();
        // reallocate spaces for the flying vertices. 
        size_t vsize_old = graph.lvid2record.size();
        size_t vsize_new = vsize_old + flying_vids.size();
        graph.lvid2record.resize(vsize_new);
        graph.local_graph.resize(vsize_new);
        for (typename boost::unordered_map<vertex_id_type, mirror_type>::iterator it = flying_vids.begin();
             it != flying_vids.end(); ++it) {
          lvid_type lvid = lvid_start + vid2lvid_buffer.size();
          vertex_id_type gvid = it->first; 
          graph.lvid2record[lvid].owner = rpc.procid();
          graph.lvid2record[lvid].gvid = gvid;
          graph.lvid2record[lvid]._mirrors= it->second;
          vid2lvid_buffer[gvid] = lvid;
          // std::cout << "proc " << rpc.procid() << " recevies flying vertex " << gvid << std::endl;
        }
      } // end of master handshake

      /**************************************************************************/
      /*                                                                        */
      /*                        Merge in vid2lvid_buffer                        */
      /*                                                                        */
      /**************************************************************************/
      {
        if (graph.vid2lvid.size() == 0) {
          graph.vid2lvid.swap(vid2lvid_buffer);
        } else {
          graph.vid2lvid.rehash(graph.vid2lvid.size() + vid2lvid_buffer.size());
          foreach (const typename vid2lvid_map_type::value_type& pair, vid2lvid_buffer) {
            graph.vid2lvid.insert(pair);
          }
          vid2lvid_buffer.clear();
          // vid2lvid_buffer.swap(vid2lvid_map_type(-1));
        }
      }


      /**************************************************************************/
      /*                                                                        */
      /*              synchronize vertex data and meta information              */
      /*                                                                        */
      /**************************************************************************/
      {
        // construct the vertex set of changed vertices
        
        // Fast pass for first time finalize;
        vertex_set changed_vset(true);

        // Compute the vertices that needs synchronization 
        if (!first_time_finalize) {
          vertex_set changed_vset = vertex_set(false);
          changed_vset.make_explicit(graph);
          updated_lvids.resize(graph.num_local_vertices());
          for (lvid_type i = lvid_start; i <  graph.num_local_vertices(); ++i) {
            updated_lvids.set_bit(i);
          }
          changed_vset.localvset = updated_lvids; 
          buffered_exchange<vertex_id_type> vset_exchange(rpc.dc());
          // sync vset with all mirrors
          changed_vset.synchronize_mirrors_to_master_or(graph, vset_exchange);
          changed_vset.synchronize_master_to_mirrors(graph, vset_exchange);
        }

        graphlab::graph_gather_apply<graph_type, vertex_negotiator_record> 
            vrecord_sync_gas(graph, 
                             boost::bind(&distributed_ingress_base::finalize_gather, this, _1, _2), 
                             boost::bind(&distributed_ingress_base::finalize_apply, this, _1, _2, _3));
        vrecord_sync_gas.exec(changed_vset);

        if(rpc.procid() == 0)       
          memory_info::log_usage("Finished synchronizing vertex (meta)data");
      }

      exchange_global_info();
    } // end of finalize


    /* Exchange graph statistics among all nodes and compute
     * global statistics for the distributed graph. */
    void exchange_global_info () {
      // Count the number of vertices owned locally
      graph.local_own_nverts = 0;
      foreach(const vertex_record& record, graph.lvid2record)
        if(record.owner == rpc.procid()) ++graph.local_own_nverts;

      // Finalize global graph statistics. 
      logstream(LOG_INFO)
        << "Graph Finalize: exchange global statistics " << std::endl;

      // Compute edge counts
      std::vector<size_t> swap_counts(rpc.numprocs());
      swap_counts[rpc.procid()] = graph.num_local_edges();
      rpc.all_gather(swap_counts);
      graph.nedges = 0;
      foreach(size_t count, swap_counts) graph.nedges += count;


      // compute vertex count
      swap_counts[rpc.procid()] = graph.num_local_own_vertices();
      rpc.all_gather(swap_counts);
      graph.nverts = 0;
      foreach(size_t count, swap_counts) graph.nverts += count;

      // compute replicas
      swap_counts[rpc.procid()] = graph.num_local_vertices();
      rpc.all_gather(swap_counts);
      graph.nreplicas = 0;
      foreach(size_t count, swap_counts) graph.nreplicas += count;


      if (rpc.procid() == 0) {
        logstream(LOG_EMPH) << "Graph info: "  
                            << "\n\t nverts: " << graph.num_vertices()
                            << "\n\t nedges: " << graph.num_edges()
                            << "\n\t nreplicas: " << graph.nreplicas
                            << "\n\t replication factor: " << (double)graph.nreplicas/graph.num_vertices()
                            << std::endl;
      }
    }


  private:
    boost::function<void(vertex_data_type&, const vertex_data_type&)> vertex_combine_strategy;

    /**
     * \brief Gather the vertex distributed meta data.
     */
    vertex_negotiator_record finalize_gather(lvid_type& lvid, graph_type& graph) {
        vertex_negotiator_record accum;
        accum.num_in_edges = graph.local_graph.num_in_edges(lvid);
        accum.num_out_edges = graph.local_graph.num_out_edges(lvid);
        if (graph.l_is_master(lvid)) {
          accum.has_data = true;
          accum.vdata = graph.l_vertex(lvid).data();
          accum.mirrors = graph.lvid2record[lvid]._mirrors;
        } 
        return accum;
    }

    /**
     * \brief Update the vertex datastructures with the gathered vertex metadata.  
     */
    void finalize_apply(lvid_type lvid, const vertex_negotiator_record& accum, graph_type& graph) {
        typename graph_type::vertex_record& vrec = graph.lvid2record[lvid];
        vrec.num_in_edges = accum.num_in_edges;
        vrec.num_out_edges = accum.num_out_edges;
        graph.l_vertex(lvid).data() = accum.vdata;
        vrec._mirrors = accum.mirrors;
    }
  }; // end of distributed_ingress_base
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_oblivious_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_OBLIVIOUS_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_OBLIVIOUS_INGRESS_HPP


#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/ingress/ingress_edge_decision.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/cuckoo_map_pow2.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
    class distributed_graph;

  /**
   * \brief Ingress object assigning edges using randoming hash function.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_oblivious_ingress: 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;
    
    typedef typename graph_type::vertex_record vertex_record;
    typedef typename graph_type::mirror_type mirror_type;

    typedef distributed_ingress_base<VertexData, EdgeData> base_type;
    // typedef typename boost::unordered_map<vertex_id_type, std::vector<size_t> > degree_hash_table_type;
    typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type; 

    /** Type of the degree hash table: 
     * a map from vertex id to a bitset of length num_procs. */
    typedef cuckoo_map_pow2<vertex_id_type, bin_counts_type,3,uint32_t> degree_hash_table_type;
    degree_hash_table_type dht;

    /** Array of number of edges on each proc. */
    std::vector<size_t> proc_num_edges;
    simple_spinlock obliv_lock;
    
    /** Ingress traits. */
    bool usehash;
    bool userecent;

  public:
    distributed_oblivious_ingress(distributed_control& dc, graph_type& graph, bool usehash = false, bool userecent = false) :
      base_type(dc, graph),
      dht(-1),proc_num_edges(dc.numprocs()), usehash(usehash), userecent(userecent) { 

      //INITIALIZE_TRACER(ob_ingress_compute_assignments, "Time spent in compute assignment");
     }

    ~distributed_oblivious_ingress() { }

    /** Add an edge to the ingress object using oblivious greedy assignment. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      obliv_lock.lock();
      dht[source]; dht[target];
      const procid_t owning_proc = 
        base_type::edge_decision.edge_to_proc_greedy(source, target, dht[source], dht[target], proc_num_edges, usehash, userecent);
      obliv_lock.unlock();

      typedef typename base_type::edge_buffer_record edge_buffer_record;
      edge_buffer_record record(source, target, edata);
#ifdef _OPENMP
      base_type::edge_exchange.send(owning_proc, record, omp_get_thread_num());
#else      
      base_type::edge_exchange.send(owning_proc, record);
#endif
    } // end of add edge

    virtual void finalize() {
     dht.clear();
     distributed_ingress_base<VertexData, EdgeData>::finalize(); 
      
    }

  }; // end of distributed_ob_ingress

}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/distributed_random_ingress.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_RANDOM_INGRESS_HPP
#define GRAPHLAB_DISTRIBUTED_RANDOM_INGRESS_HPP

#include <boost/functional/hash.hpp>

#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/ingress/distributed_ingress_base.hpp>
#include <graphlab/graph/distributed_graph.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class distributed_graph;

  /**
   * \brief Ingress object assigning edges using randoming hash function.
   */
  template<typename VertexData, typename EdgeData>
  class distributed_random_ingress : 
    public distributed_ingress_base<VertexData, EdgeData> {
  public:
    typedef distributed_graph<VertexData, EdgeData> graph_type;
    /// The type of the vertex data stored in the graph 
    typedef VertexData vertex_data_type;
    /// The type of the edge data stored in the graph 
    typedef EdgeData   edge_data_type;


    typedef distributed_ingress_base<VertexData, EdgeData> base_type;
   
  public:
    distributed_random_ingress(distributed_control& dc, graph_type& graph) :
    base_type(dc, graph) {
    } // end of constructor

    ~distributed_random_ingress() { }

    /** Add an edge to the ingress object using random assignment. */
    void add_edge(vertex_id_type source, vertex_id_type target,
                  const EdgeData& edata) {
      typedef typename base_type::edge_buffer_record edge_buffer_record;
      const procid_t owning_proc = base_type::edge_decision.edge_to_proc_random(source, target, base_type::rpc.numprocs());
      const edge_buffer_record record(source, target, edata);
      base_type::edge_exchange.send(owning_proc, record);
    } // end of add edge
  }; // end of distributed_random_ingress
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/graph/ingress/ingress_edge_decision.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_DISTRIBUTED_INGRESS_EDGE_DECISION_HPP
#define GRAPHLAB_DISTRIBUTED_INGRESS_EDGE_DECISION_HPP

#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/graph_hash.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <boost/random/uniform_int_distribution.hpp>

namespace graphlab {
  template<typename VertexData, typename EdgeData>
  class distributed_graph;
 
 template<typename VertexData, typename EdgeData>
 class ingress_edge_decision {

    public:
      typedef graphlab::vertex_id_type vertex_id_type;
      typedef distributed_graph<VertexData, EdgeData> graph_type;
      typedef fixed_dense_bitset<RPC_MAX_N_PROCS> bin_counts_type; 

    public:
      /** \brief A decision object for computing the edge assingment. */
      ingress_edge_decision(distributed_control& dc) { }

      /** Random assign (source, target) to a machine p in {0, ... numprocs-1} */
      procid_t edge_to_proc_random (const vertex_id_type source, 
          const vertex_id_type target,
          size_t numprocs) {
        typedef std::pair<vertex_id_type, vertex_id_type> edge_pair_type;
        const edge_pair_type edge_pair(std::min(source, target), 
            std::max(source, target));
        return graph_hash::hash_edge(edge_pair) % (numprocs);
      };

      /** Random assign (source, target) to a machine p in a list of candidates */
      procid_t edge_to_proc_random (const vertex_id_type source, 
          const vertex_id_type target,
          const std::vector<procid_t> & candidates) {
        typedef std::pair<vertex_id_type, vertex_id_type> edge_pair_type;
        const edge_pair_type edge_pair(std::min(source, target), 
            std::max(source, target));

        return candidates[graph_hash::hash_edge(edge_pair) % (candidates.size())];
      };


      /** Greedy assign (source, target) to a machine using: 
       *  bitset<MAX_MACHINE> src_degree : the degree presence of source over machines
       *  bitset<MAX_MACHINE> dst_degree : the degree presence of target over machines
       *  vector<size_t>      proc_num_edges : the edge counts over machines
       * */
      procid_t edge_to_proc_greedy (const vertex_id_type source, 
          const vertex_id_type target,
          bin_counts_type& src_degree,
          bin_counts_type& dst_degree,
          std::vector<size_t>& proc_num_edges,
          bool usehash = false,
          bool userecent = false) {
        size_t numprocs = proc_num_edges.size();

        // Compute the score of each proc.
        procid_t best_proc = -1; 
        double maxscore = 0.0;
        double epsilon = 1.0; 
        std::vector<double> proc_score(numprocs); 
        size_t minedges = *std::min_element(proc_num_edges.begin(), proc_num_edges.end());
        size_t maxedges = *std::max_element(proc_num_edges.begin(), proc_num_edges.end());

        for (size_t i = 0; i < numprocs; ++i) {
          size_t sd = src_degree.get(i) + (usehash && (source % numprocs == i));
          size_t td = dst_degree.get(i) + (usehash && (target % numprocs == i));
          double bal = (maxedges - proc_num_edges[i])/(epsilon + maxedges - minedges);
          proc_score[i] = bal + ((sd > 0) + (td > 0));
        }
        maxscore = *std::max_element(proc_score.begin(), proc_score.end());

        std::vector<procid_t> top_procs; 
        for (size_t i = 0; i < numprocs; ++i)
          if (std::fabs(proc_score[i] - maxscore) < 1e-5)
            top_procs.push_back(i);

        // Hash the edge to one of the best procs.
        typedef std::pair<vertex_id_type, vertex_id_type> edge_pair_type;
        const edge_pair_type edge_pair(std::min(source, target), 
            std::max(source, target));
        best_proc = top_procs[graph_hash::hash_edge(edge_pair) % top_procs.size()];

        ASSERT_LT(best_proc, numprocs);
        if (userecent) {
          src_degree.clear();
          dst_degree.clear();
        }
        src_degree.set_bit(best_proc);
        dst_degree.set_bit(best_proc);
        ++proc_num_edges[best_proc];
        return best_proc;
      };

      /** Greedy assign (source, target) to a machine using: 
       *  bitset<MAX_MACHINE> src_degree : the degree presence of source over machines
       *  bitset<MAX_MACHINE> dst_degree : the degree presence of target over machines
       *  vector<size_t>      proc_num_edges : the edge counts over machines
       * */
      procid_t edge_to_proc_greedy (const vertex_id_type source, 
          const vertex_id_type target,
          bin_counts_type& src_degree,
          bin_counts_type& dst_degree,
          std::vector<procid_t>& candidates,
          std::vector<size_t>& proc_num_edges,
          bool usehash = false,
          bool userecent = false
          ) {
        size_t numprocs = proc_num_edges.size();

        // Compute the score of each proc.
        procid_t best_proc = -1; 
        double maxscore = 0.0;
        double epsilon = 1.0; 
        std::vector<double> proc_score(candidates.size()); 
        size_t minedges = *std::min_element(proc_num_edges.begin(), proc_num_edges.end());
        size_t maxedges = *std::max_element(proc_num_edges.begin(), proc_num_edges.end());

        for (size_t j = 0; j < candidates.size(); ++j) {
          size_t i = candidates[j];
          size_t sd = src_degree.get(i) + (usehash && (source % numprocs == i));
          size_t td = dst_degree.get(i) + (usehash && (target % numprocs == i));
          double bal = (maxedges - proc_num_edges[i])/(epsilon + maxedges - minedges);
          proc_score[j] = bal + ((sd > 0) + (td > 0));
        }
        maxscore = *std::max_element(proc_score.begin(), proc_score.end());

        std::vector<procid_t> top_procs; 
        for (size_t j = 0; j < candidates.size(); ++j)
          if (std::fabs(proc_score[j] - maxscore) < 1e-5)
            top_procs.push_back(candidates[j]);

        // Hash the edge to one of the best procs.
        typedef std::pair<vertex_id_type, vertex_id_type> edge_pair_type;
        const edge_pair_type edge_pair(std::min(source, target), 
            std::max(source, target));
        best_proc = top_procs[graph_hash::hash_edge(edge_pair) % top_procs.size()];

        ASSERT_LT(best_proc, numprocs);
        if (userecent) {
          src_degree.clear();
          dst_degree.clear();
        }
        src_degree.set_bit(best_proc);
        dst_degree.set_bit(best_proc);
        ++proc_num_edges[best_proc];
        return best_proc;
      };
      
     /** HDRF greedy assign (source, target) to a machine using: 
      *  bitset<MAX_MACHINE> src_degree : the degree presence of source over machines
      *  bitset<MAX_MACHINE> dst_degree : the degree presence of target over machines
      *  size_t              src_true_degree : the degree of source vertex over machines
      *  size_t              dst_true_degree : the degree of target vertex over machines
      *  vector<size_t>      proc_num_edges : the edge counts over machines
      *
      *  author : Fabio Petroni [www.fabiopetroni.com]
	  *           Giorgio Iacoboni [g.iacoboni@gmail.com]
      *
      *  Based on the publication:	
      *  F. Petroni, L. Querzoni, K. Daudjee, S. Kamali and G. Iacoboni: 
      *  "HDRF: Stream-Based Partitioning for Power-Law Graphs". 
      *  CIKM, 2015.
      * */
     procid_t edge_to_proc_hdrf (const vertex_id_type source, 
          const vertex_id_type target,
          bin_counts_type& src_degree,
          bin_counts_type& dst_degree,
          size_t& src_true_degree,
          size_t& dst_true_degree,
          std::vector<size_t>& proc_num_edges,
          bool usehash = false,
          bool userecent = false) {
        
        size_t numprocs = proc_num_edges.size();
        
        size_t degree_u = src_true_degree;
        degree_u = degree_u +1;
        size_t degree_v = dst_true_degree;
        degree_v = degree_v +1;
        size_t SUM = degree_u + degree_v;
        double fu = degree_u;
        fu /= SUM;
        double fv = degree_v;
        fv /= SUM;
        
        // Compute the score of each proc.
        procid_t best_proc = -1; 
        double maxscore = 0.0;
        double epsilon = 1.0; 
        std::vector<double> proc_score(numprocs); 
        size_t minedges = *std::min_element(proc_num_edges.begin(), proc_num_edges.end());
        size_t maxedges = *std::max_element(proc_num_edges.begin(), proc_num_edges.end());
        
        for (size_t i = 0; i < numprocs; ++i) {
		  double new_sd = 0;
		  double new_td = 0;
		  size_t sd = src_degree.get(i) + (usehash && (source % numprocs == i));
		  size_t td = dst_degree.get(i) + (usehash && (target % numprocs == i));
		  if (sd > 0){
		    new_sd = 1+(1-fu);
		  }
		  if (td > 0){
		    new_td = 1+(1-fv);
		  }
         double bal = (maxedges - proc_num_edges[i])/(epsilon + maxedges - minedges);

         proc_score[i] = bal + new_sd + new_td;
        }
        
        maxscore = *std::max_element(proc_score.begin(), proc_score.end());
        
        std::vector<procid_t> top_procs; 
        for (size_t i = 0; i < numprocs; ++i)
          if (std::fabs(proc_score[i] - maxscore) < 1e-5)
            top_procs.push_back(i);
        
        // Hash the edge to one of the best procs.
        typedef std::pair<vertex_id_type, vertex_id_type> edge_pair_type;
        const edge_pair_type edge_pair(std::min(source, target), std::max(source, target));
        best_proc = top_procs[graph_hash::hash_edge(edge_pair) % top_procs.size()];
        
        ASSERT_LT(best_proc, numprocs);
        if (userecent) {
          src_degree.clear();
          dst_degree.clear();
        }
        src_degree.set_bit(best_proc);
        dst_degree.set_bit(best_proc);
        ++proc_num_edges[best_proc];
        ++src_true_degree;
        ++dst_true_degree;
        return best_proc;
     };
  };// end of ingress_edge_decision
}

#endif


================================================
FILE: src/graphlab/graph/ingress/sharding_constraint.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_DISTRIBUTED_SHARDING_CONSTRAINT_HPP
#define GRAPHLAB_DISTRIBUTED_SHARDING_CONSTRAINT_HPP

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/util/generate_pds.hpp>
#include <algorithm>
#include <vector>


/**
 * This class defines the dependencies among the shards when using
 * a constrained partitioning algorithm.
 *
 * In constrained partitioning, vertices are assgined to a master shard
 * using hash function on the vids.  Each shard S masters a partition of 
 * vertices: V_s. 
 *
 * Let Ai be the set of shards that Shard i depends on. Then the partitioning
 * algorithm can only put edges with either ends in V_si into Ai. For example,
 * Shard i is the master of vertex u, and Shard j is the master of vertex v,
 * then edge u->v must be placed into Ai \intersect Aj.
 *
 * This class currently has two implementations of the shard constraints. One
 * construction is based on a grid, and the other is based on perfect difference set.
 * Both algorithms guarentees that Ai \intersect Aj is non-empty.
 * 
 * \note: grid methods requires the number of shards to be a perfect square number. pds
 * requires the number of shards to be p^2 + p + 1 where p is a prime number.
 * 
 */
namespace graphlab {
  class sharding_constraint {
    size_t nshards;
    std::vector<std::vector<procid_t> > constraint_graph;

    std::vector<std::vector<std::vector<procid_t> > > joint_nbr_cache;
   public:
    /// Test if the provided num_shards can be used for grid construction: 
    //    n == nrow*ncol  && (abs(nrow-ncol) <= 2)
    static bool is_grid_compatible(size_t num_shards, int& nrow, int& ncol) {
      double approx_sqrt = sqrt(num_shards);
      nrow = floor(approx_sqrt);
      for (ncol = nrow; ncol <= nrow + 2; ++ncol) {
        if (ncol * nrow == (int)num_shards) {
          return true;
        }
      }
      return false;
    }

    static bool is_pds_compatible(size_t num_shards, int& p) {
      p = floor(sqrt(num_shards-1));
      return (p>0 && ((p*p+p+1) == (int)num_shards));
    }

   public:
    sharding_constraint(size_t num_shards, std::string method) {
      nshards = num_shards;
      // ignore the method input for now, only construct grid graph. 
      // assuming nshards is perfect square
      if (method == "grid") {
        make_grid_constraint();
      } else if (method == "pds") {
        make_pds_constraint();
      } else {
        logstream(LOG_FATAL) << "Unknown sharding constraint method: " << method << std::endl;
      }

      joint_nbr_cache.resize(num_shards);
      for (size_t i = 0; i < num_shards; ++i) {
        joint_nbr_cache[i].resize(num_shards);
        for (size_t j = 0; j < num_shards; ++j) {
          compute_neighbors(i, j, joint_nbr_cache[i][j]);
          ASSERT_GT(joint_nbr_cache[i][j].size(), 0);
        }
      }
    }

    bool get_neighbors (procid_t shard, std::vector<procid_t>& neighbors) {
      ASSERT_LT(shard, nshards);
      neighbors.clear();
      std::vector<procid_t>& ls = constraint_graph[shard];
      for (size_t i = 0; i < ls.size(); ++i)
        neighbors.push_back(ls[i]);
      return true;
    }

    
    const std::vector<procid_t>& get_joint_neighbors (procid_t shardi, procid_t shardj) {
      return joint_nbr_cache[shardi][shardj];
    }

   private:
    void make_grid_constraint() {
      int ncols, nrows;
      if (!is_grid_compatible(nshards, nrows, ncols)) {
        logstream(LOG_FATAL) << "Num shards: " << nshards << " cannot be used for grid ingress." << std::endl;
      };

      for (size_t i = 0; i < nshards; i++) {
        std::vector<procid_t> adjlist;
        // add self
        adjlist.push_back(i);

        // add the row of i
        size_t rowbegin = (i/ncols) * ncols;
        for (size_t j = rowbegin; j < rowbegin + ncols; ++j)
          if (i != j) adjlist.push_back(j); 

        // add the col of i
        for (size_t j = i % ncols; j < nshards; j+=ncols)
          if (i != j) adjlist.push_back(j); 

        std::sort(adjlist.begin(), adjlist.end());
        constraint_graph.push_back(adjlist);
      }
    }

    void make_pds_constraint() {
      int p = 0;
      if (!is_pds_compatible(nshards, p)) {
        logstream(LOG_FATAL) << "Num shards: " << nshards << " cannot be used for pdsingress." << std::endl;
      };
      pds pds_generator;
      std::vector<size_t> results;
      if (p == 1) {
        results.push_back(0);
        results.push_back(2);
      } else {
        results = pds_generator.get_pds(p);
      }
      for (size_t i = 0; i < nshards; i++) {
        std::vector<procid_t> adjlist;
        for (size_t j = 0; j < results.size(); j++) {
          adjlist.push_back( (results[j] + i) % nshards);
        }
        std::sort(adjlist.begin(), adjlist.end());
        constraint_graph.push_back(adjlist);
      }
    }


    bool compute_neighbors(procid_t shardi, procid_t shardj, std::vector<procid_t>& neighbors) {
      ASSERT_EQ(neighbors.size(), 0);
      ASSERT_LT(shardi, nshards);
      ASSERT_LT(shardj, nshards);
      // if (shardi == shardj) {
      //   neighbors.push_back(shardi);
      //   return true;
      // }

      std::vector<procid_t>& ls1 = constraint_graph[shardi];
      std::vector<procid_t>& ls2 = constraint_graph[shardj];
      neighbors.clear();
      size_t i = 0;
      size_t j = 0;
      while (i < ls1.size() && j < ls2.size()) {
        if (ls1[i] == ls2[j]) {
          neighbors.push_back(ls1[i]);
          ++i; ++j;
        } else if (ls1[i] < ls2[j]) {
          ++i;
        } else {
          ++j;
        }
      }
      return true;
    }

  }; // end of sharding_constraint
}; // end of namespace graphlab
#endif


================================================
FILE: src/graphlab/graph/local_edge_buffer.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_LOCAL_EDGE_BUFFER
#define GRAPHLAB_LOCAL_EDGE_BUFFER

#include <vector>
#include <graphlab/graph/graph_basic_types.hpp>

namespace graphlab {    

    template<typename VertexData, typename EdgeData>
    // Edge class for temporary storage. Will be finalized into the CSR+CSC form.
    class local_edge_buffer {
    public:
      std::vector<EdgeData> data;
      std::vector<lvid_type> source_arr;
      std::vector<lvid_type> target_arr;
    public:
      local_edge_buffer() {}
      void reserve_edge_space(size_t n) {
        data.reserve(n);
        source_arr.reserve(n);
        target_arr.reserve(n);
      }
      // \brief Add an edge to the temporary storage.
      void add_edge(lvid_type source, lvid_type target, EdgeData _data) {
        data.push_back(_data);
        source_arr.push_back(source);
        target_arr.push_back(target);
      }
      // \brief Add edges in block to the temporary storage.
      void add_block_edges(const std::vector<lvid_type>& src_arr, 
                           const std::vector<lvid_type>& dst_arr, 
                           const std::vector<EdgeData>& edata_arr) {
        data.insert(data.end(), edata_arr.begin(), edata_arr.end());
        source_arr.insert(source_arr.end(), src_arr.begin(), src_arr.end());
        target_arr.insert(target_arr.end(), dst_arr.begin(), dst_arr.end());
      }
      // \brief Remove all contents in the storage. 
      void clear() {
        std::vector<EdgeData>().swap(data);
        std::vector<lvid_type>().swap(source_arr);
        std::vector<lvid_type>().swap(target_arr);
      }
      // \brief Return the size of the storage.
      size_t size() const {
        return source_arr.size();
      }
      // \brief Return the estimated memory footprint used.
      size_t estimate_sizeof() const {
        return data.capacity()*sizeof(EdgeData) + 
          source_arr.capacity()*sizeof(lvid_type)*2 + 
          sizeof(data) + sizeof(source_arr)*2 + sizeof(local_edge_buffer);
      }
    }; // end of class local_edge_buffer.
} // end of namespace
#endif


================================================
FILE: src/graphlab/graph/local_graph.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_LOCAL_GRAPH_HPP
#define GRAPHLAB_LOCAL_GRAPH_HPP

#include <cmath>

#include <string>
#include <list>
#include <vector>
#include <set>
#include <map>

#include <queue>
#include <algorithm>
#include <functional>
#include <fstream>

#include <boost/bind.hpp>
#include <boost/unordered_set.hpp>
#include <boost/type_traits.hpp>
#include <boost/typeof/typeof.hpp>
#include <boost/iterator/transform_iterator.hpp>
#include <boost/iterator/counting_iterator.hpp>
#include <boost/iterator/zip_iterator.hpp>
#include <boost/range/iterator_range.hpp>

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/local_edge_buffer.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/util/generics/shuffle.hpp>
#include <graphlab/util/generics/counting_sort.hpp>
#include <graphlab/util/generics/vector_zip.hpp>
#include <graphlab/util/generics/csr_storage.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab { 

  template<typename VertexData, typename EdgeData>
  class local_graph {
  public:
    
    /** The type of the vertex data stored in the local_graph. */
    typedef VertexData vertex_data_type;

    /** The type of the edge data stored in the local_graph. */
    typedef EdgeData edge_data_type;

    typedef graphlab::vertex_id_type vertex_id_type;
    typedef graphlab::edge_id_type edge_id_type;

    class edge_type;
    class vertex_type;

  private:
    class edge_iterator;

  public:
    typedef boost::iterator_range<edge_iterator> edge_list_type;

    /** Vertex object which provides access to the vertex data
     * and information about it.
     */ 
    class vertex_type {
     public:
       vertex_type(local_graph& lgraph_ref, lvid_type vid):lgraph_ref(lgraph_ref),vid(vid) { }

       /// \brief Returns a constant reference to the data on the vertex.
       const vertex_data_type& data() const {
         return lgraph_ref.vertex_data(vid);
       }
       /// \brief Returns a reference to the data on the vertex.
       vertex_data_type& data() {
         return lgraph_ref.vertex_data(vid);
       }
       /// \brief Returns the number of in edges of the vertex.
       size_t num_in_edges() const {
         return lgraph_ref.num_in_edges(vid);
       }
       /// \brief Returns the number of out edges of the vertex.
       size_t num_out_edges() const {
         return lgraph_ref.num_out_edges(vid);
       }
       /// \brief Returns the ID of the vertex. 
       lvid_type id() const {
         return vid;
       }
       /// \brief Returns a list of in edges.
       edge_list_type in_edges() {
         return lgraph_ref.in_edges(vid);
       }
       /// \brief Returns a list of out edges.
       edge_list_type out_edges() {
         return lgraph_ref.out_edges(vid);
       }
     private:
       local_graph& lgraph_ref;
       lvid_type vid;
    };
    /** Edge object which provides access to the edge data
     * and information about it.
     */
    class edge_type {
     public:
      edge_type(local_graph& lgraph_ref, lvid_type _source, lvid_type _target, edge_id_type _eid) : 
        lgraph_ref(lgraph_ref), _source(_source), _target(_target), _eid(_eid) { }

      /// \brief Returns a constant reference to the data on the edge.
      const edge_data_type& data() const {
        return lgraph_ref.edge_data(_eid);
      }
      /// \brief Returns a reference to the data on the edge.
      edge_data_type& data() {
        return lgraph_ref.edge_data(_eid);
      }
      /// \brief Returns the source vertex of the edge.
      vertex_type source() const {
        return vertex_type(lgraph_ref, _source);
      }
      /// \brief Returns the target vertex of the edge.
      vertex_type target() const {
        return vertex_type(lgraph_ref, _target);
      }
      /// \brief Returns the internal ID of this edge
      edge_id_type id() const { return _eid; }

     private:
      local_graph& lgraph_ref;
      lvid_type _source;
      lvid_type _target;
      edge_id_type _eid;
    };

  public:

    // CONSTRUCTORS ============================================================>
    
    /** Create an empty local_graph. */
    local_graph() : finalized(false) { }

    /** Create a local_graph with nverts vertices. */
    local_graph(size_t nverts) :
      vertices(nverts),
      finalized(false) { }

    // METHODS =================================================================>
    
    static bool is_dynamic() {
      return false;
    }

    /**
     * \brief Resets the local_graph state.
     */
    void clear() {
      finalized = false;
      vertices.clear();
      edges.clear();
      _csc_storage.clear();
      _csr_storage.clear();
      std::vector<VertexData>().swap(vertices);
      std::vector<EdgeData>().swap(edges);
      edge_buffer.clear();
    }

    /**
     * \brief Finalize the local_graph data structure by
     * sorting edges to maximize the efficiency of graphlab.  
     * This function takes O(|V|log(degree)) time and will 
     * fail if there are any duplicate edges.
     * Detail implementation depends on the type of graph_storage.
     * This is also automatically invoked by the engine at start.
     */
    void finalize() {   
      if(finalized) return;
      graphlab::timer mytimer; mytimer.start();
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize starts." << std::endl;
#endif
      std::vector<edge_id_type> permute;
      std::vector<edge_id_type> src_counting_prefix_sum;
      std::vector<edge_id_type> dest_counting_prefix_sum;
           
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by source vertex" << std::endl;
#endif
      // Sort edges by source;
      // Begin of counting sort.
      counting_sort(edge_buffer.source_arr, permute, &src_counting_prefix_sum);

      // Inplace permute of edge_data, edge_src, edge_target array.
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Inplace permute by source id" << std::endl;
#endif
      lvid_type swap_src; lvid_type swap_target; EdgeData  swap_data;
      for (size_t i = 0; i < permute.size(); ++i) {
        if (i != permute[i]) {
          // Reserve the ith entry;
          size_t j = i;
          swap_data = edge_buffer.data[i];
          swap_src = edge_buffer.source_arr[i];
          swap_target = edge_buffer.target_arr[i];
          // Begin swap cycle:
          while (j != permute[j]) {
            size_t next = permute[j];
            if (next != i) {
              edge_buffer.data[j] = edge_buffer.data[next];
              edge_buffer.source_arr[j] = edge_buffer.source_arr[next];
              edge_buffer.target_arr[j] = edge_buffer.target_arr[next];
              permute[j] = j;
              j = next;
            } else {
              // end of cycle
              edge_buffer.data[j] = swap_data;
              edge_buffer.source_arr[j] = swap_src;
              edge_buffer.target_arr[j] = swap_target;
              permute[j] = j;
              break;
            }
          }
        }
      }
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Sort by dest id" << std::endl;
#endif
      counting_sort(edge_buffer.target_arr, permute, &dest_counting_prefix_sum); 
      // Shuffle source array
#ifdef DEBUG_GRAPH
      logstream(LOG_DEBUG) << "Graph2 finalize: Outofplace permute by dest id" << std::endl;
#endif

      outofplace_shuffle(edge_buffer.source_arr, permute);
      // Use inplace shuffle to reduce peak memory footprint:
      // inplace_shuffle(edge_buffer.source_arr, permute);
      // counting_sort(edge_buffer.target_arr, permute);

      // warp into csr csc storage.
      _csr_storage.wrap(src_counting_prefix_sum, edge_buffer.target_arr);
      std::vector<std::pair<lvid_type, edge_id_type> > csc_value = vector_zip(edge_buffer.source_arr, permute);
      //ASSERT_EQ(csc_value.size(), edge_buffer.size());
      _csc_storage.wrap(dest_counting_prefix_sum, csc_value); 
      edges.swap(edge_buffer.data);
      ASSERT_EQ(_csr_storage.num_values(), _csc_storage.num_values());
      ASSERT_EQ(_csr_storage.num_values(), edges.size());
#ifdef DEBGU_GRAPH
      logstream(LOG_DEBUG) << "End of finalize." << std::endl;
#endif

      logstream(LOG_INFO) << "Graph finalized in " << mytimer.current_time() 
                          << " secs" << std::endl;
      finalized = true;
    } // End of finalize

    /** \brief Get the number of vertices */
    size_t num_vertices() const {
      return vertices.size();
    } // end of num vertices

    /** \brief Get the number of edges */
    size_t num_edges() const {
        return edges.size();
    } // end of num edges

    /** 
     * \brief Creates a vertex containing the vertex data and returns the id
     * of the new vertex id. Vertex ids are assigned in increasing order with
     * the first vertex having id 0.
     */
    void add_vertex(lvid_type vid, 
                    const VertexData& vdata = VertexData() ) {
      if(vid >= vertices.size()) {
        // Enable capacity doubling if resizing beyond capacity
        if(vid >= vertices.capacity()) {
          const size_t new_size = std::max(2 * vertices.capacity(), 
                                           size_t(vid));
          vertices.reserve(new_size);
        }
        vertices.resize(vid+1);
      }
      vertices[vid] = vdata;    
    } // End of add vertex;

    void reserve(size_t num_vertices) {
      ASSERT_GE(num_vertices, vertices.size());
      vertices.reserve(num_vertices);
    }

    /** 
     * \brief Add additional vertices up to provided num_vertices.  This will
     * fail if resizing down.
     */
    void resize(size_t num_vertices ) {
      ASSERT_GE(num_vertices, vertices.size());
      vertices.resize(num_vertices);
    } // End of resize

    void reserve_edge_space(size_t n) {
      edge_buffer.reserve_edge_space(n);
    }
    /**
     * \brief Creates an edge connecting vertex source to vertex target.  Any
     * existing data will be cleared. Should not be called after finalization.
     */
    edge_id_type add_edge(lvid_type source, lvid_type target, 
                          const EdgeData& edata = EdgeData()) {
      if (finalized) {
        logstream(LOG_FATAL)
          << "Attempting add edge to a finalized local_graph." << std::endl;
        ASSERT_MSG(false, "Add edge to a finalized local_graph.");
      }

      if(source == target) {
        logstream(LOG_FATAL) 
          << "Attempting to add self edge (" << source << " -> " << target <<  ").  "
          << "This operation is not permitted in GraphLab!" << std::endl;
        ASSERT_MSG(source != target, "Attempting to add self edge!");
      }

      if(source >= vertices.size() || target >= vertices.size()) 
        add_vertex(std::max(source, target));

      // Add the edge to the set of edge data (this copies the edata)
      edge_buffer.add_edge(source, target, edata);

      // This is not the final edge_id, so we always return 0. 
      return 0;
    } // End of add edge
    
    /**
     * \brief Add edges in block.
     */
    void add_edges(const std::vector<lvid_type>& src_arr, 
                   const std::vector<lvid_type>& dst_arr, 
                   const std::vector<EdgeData>& edata_arr) {
      ASSERT_TRUE((src_arr.size() == dst_arr.size())
                  && (src_arr.size() == edata_arr.size()));
      if (finalized) {
        logstream(LOG_FATAL)
          << "Attempting add edges to a finalized local_graph." << std::endl;
      }

      for (size_t i = 0; i < src_arr.size(); ++i) {
        lvid_type source = src_arr[i];
        lvid_type target = dst_arr[i];
        if ( source >= vertices.size() 
             || target >= vertices.size() ) {
          logstream(LOG_FATAL) 
            << "Attempting add_edge (" << source
            << " -> " << target
            << ") when there are only " << vertices.size() 
            << " vertices" << std::endl;
          ASSERT_MSG(source < vertices.size(), "Invalid source vertex!");
          ASSERT_MSG(target < vertices.size(), "Invalid target vertex!");
        }

        if(source == target) {
          logstream(LOG_FATAL) 
            << "Attempting to add self edge (" << source << " -> " << target <<  ").  "
            << "This operation is not permitted in GraphLab!" << std::endl;
          ASSERT_MSG(source != target, "Attempting to add self edge!");
        }
      }
      edge_buffer.add_block_edges(src_arr, dst_arr, edata_arr);
    } // End of add block edges


    /** \brief Returns a vertex of given ID. */
    vertex_type vertex(lvid_type vid) {
      ASSERT_LT(vid, vertices.size());
      return vertex_type(*this, vid);
    }

    /** \brief Returns a vertex of given ID. */
    const vertex_type vertex(lvid_type vid) const {
      ASSERT_LT(vid, vertices.size());
      return vertex_type(*this, vid);
    }

    /** \brief Returns a reference to the data stored on the vertex v. */
    VertexData& vertex_data(lvid_type v) {
      ASSERT_LT(v, vertices.size());
      return vertices[v];
    } // end of data(v)

    /** \brief Returns a constant reference to the data stored on the vertex v. */
    const VertexData& vertex_data(lvid_type v) const {
      ASSERT_LT(v, vertices.size());
      return vertices[v];
    } // end of data(v)

    /** \brief Load the local_graph from an archive */
    void load(iarchive& arc) {
      clear();    
      // read the vertices
      arc >> vertices
          >> edges 
          >> _csr_storage
          >> _csc_storage
          >> finalized;
    } // end of load

    /** \brief Save the local_graph to an archive */
    void save(oarchive& arc) const {
      // Write the number of edges and vertices
      arc << vertices
          << edges
          << _csr_storage  
          << _csc_storage
          << finalized;
    } // end of save
    
    /** swap two graphs */
    void swap(local_graph& other) {
      finalized = other.finalized;
      std::swap(vertices, other.vertices);
      std::swap(edges, other.edges);
      std::swap(_csr_storage, other._csr_storage);
      std::swap(_csc_storage, other._csc_storage);
      std::swap(finalized, other.finalized);
    } // end of swap


    /** \brief Load the local_graph from a file */
    void load(const std::string& filename) {
      std::ifstream fin(filename.c_str());
      iarchive iarc(fin);
      iarc >> *this;
      fin.close();
    } // end of load

    /**
     * \brief save the local_graph to the file given by the filename
     */    
    void save(const std::string& filename) const {
      std::ofstream fout(filename.c_str());
      oarchive oarc(fout);
      oarc << *this;
      fout.close();
    } // end of save

    /**
     * \brief save the adjacency structure to a text file.
     *
     * Save the adjacency structure as a text file in:
     *    src_Id, dest_Id \n
     *    src_Id, dest_Id \n
     * format.
     */
    void save_adjacency(const std::string& filename) const {
      std::ofstream fout(filename.c_str());
      ASSERT_TRUE(fout.good());

      for (size_t i = 0; i < num_vertices(); ++i) {
        vertex_type v(i);
        edge_list_type ls = v.out_edges();
        foreach(edge_type e, ls) {
          fout << (lvid_type)i << ", " << e.target().id() << "\n";
          ASSERT_TRUE(fout.good());
        }
      }
      fout.close();
    }

 /****************************************************************************
 *                       Internal Functions                                 *
 *                     ----------------------                               *
 * These functions functions and types provide internal access to the       *
 * underlying local_graph representation. They should not be used unless you      *
 * *really* know what you are doing.                                        *
 ****************************************************************************/
    /** 
     * \internal
     * \brief Returns the number of in edges of the vertex with the given id. */
    size_t num_in_edges(const lvid_type v) const {
      ASSERT_TRUE(finalized);
      return (_csc_storage.end(v) - _csc_storage.begin(v));
    }

    /** 
     * \internal
     * \brief Returns the number of in edges of the vertex with the given id. */
    size_t num_out_edges(const lvid_type v) const {
      ASSERT_TRUE(finalized);
      return (_csr_storage.end(v) - _csr_storage.begin(v));
    }

    /** 
     * \internal
     * \brief Returns a list of in edges of the vertex with the given id. */
    edge_list_type in_edges(lvid_type v) {
      edge_iterator begin = edge_iterator(*this, _csc_storage.begin(v), v);
      edge_iterator end = edge_iterator(*this, _csc_storage.end(v), v);
      return boost::make_iterator_range(begin, end);
    }

    /** 
     * \internal
     * \brief Returns a list of out edges of the vertex with the given id. */
    edge_list_type out_edges(lvid_type v) {

      csr_type::iterator base_begin = _csr_storage.begin(v);
      csr_type::iterator base_end = _csr_storage.end(v);

      edge_id_type begin_eid = base_begin - _csr_storage.begin(0); 
      edge_id_type end_eid = base_end - _csr_storage.begin(0); 

      boost::counting_iterator<edge_id_type> counter_begin(begin_eid);
      boost::counting_iterator<edge_id_type> counter_end(end_eid);

      edge_iterator begin = 
          edge_iterator(*this,
              csr_edge_iterator(csr_iterator_tuple(base_begin, counter_begin)), v);

      edge_iterator end = 
          edge_iterator(*this,
              csr_edge_iterator(csr_iterator_tuple(base_end, counter_end)), v);

      return boost::make_iterator_range(begin, end);
    }

    /** 
     * \internal
     * \brief Returns edge data of edge_type e
     * */
    EdgeData& edge_data(edge_id_type eid) {
      ASSERT_LT(eid, num_edges());
      return edges[eid]; 
    }
    /** 
     * \internal
     * \brief Returns const edge data of edge_type e
     * */
    const EdgeData& edge_data(edge_id_type eid) const {
      ASSERT_LT(eid, num_edges());
      return edges[eid]; 
    }

    /** 
     * \internal
     * \brief Returns the estimated memory footprint of the local_graph. */
    size_t estimate_sizeof() const {
      const size_t vlist_size = sizeof(vertices) + 
        sizeof(VertexData) * vertices.capacity();
      size_t elist_size = _csr_storage.estimate_sizeof() 
          + _csc_storage.estimate_sizeof()
          + sizeof(edges) + sizeof(EdgeData)*edges.capacity();
      size_t ebuffer_size = edge_buffer.estimate_sizeof();
      // std::cerr << "local_graph: tmplist size: " << (double)elist_size/(1024*1024)
      //           << "  gstoreage size: " << (double)store_size/(1024*1024)
      //           << "  vdata list size: " << (double)vlist_size/(1024*1024)
      //           << std::endl;
      return vlist_size + elist_size + ebuffer_size;
    }


    /** \internal
     * \brief For debug purpose, returns the largest vertex id in the edge_buffer
     */ 
    const lvid_type maxlvid() const {
      if (edge_buffer.size()) {
        lvid_type max(0);
        foreach(lvid_type i, edge_buffer.source_arr)
         max = std::max(max, i); 
        foreach(lvid_type i, edge_buffer.target_arr)
         max = std::max(max, i); 
        return max;
      } else {
        return lvid_type(-1);
      }
    }
   
  private:    
    /** 
     * \internal
     * CSR/CSC storage types
     */
    typedef csr_storage<lvid_type, edge_id_type> csr_type;
    typedef csr_storage<std::pair<lvid_type, edge_id_type>, edge_id_type> csc_type; 

    typedef boost::tuple<csr_type::iterator,
                         boost::counting_iterator<edge_id_type>
                         > csr_iterator_tuple;

    typedef boost::zip_iterator<csr_iterator_tuple> csr_edge_iterator;
    typedef csc_type::iterator csc_edge_iterator;

    class edge_iterator : 
        public boost::iterator_facade <
        edge_iterator,
        edge_type,
        boost::random_access_traversal_tag,
        edge_type> {
         public:
           edge_iterator(local_graph& lgraph_ref, 
                         csc_edge_iterator iter, lvid_type sourceid) 
               : lgraph_ref(lgraph_ref), _type(CSC), csc_iter(iter), vid(sourceid) {}
           edge_iterator(local_graph& lgraph_ref,
                         csr_edge_iterator iter, lvid_type destid) 
               : lgraph_ref(lgraph_ref), _type(CSR), csr_iter(iter), vid(destid) {}

         private:
           friend class boost::iterator_core_access;

           void increment() {
             switch (_type) {
              case CSC: ++csc_iter; break;
              case CSR: ++csr_iter; break;
              default: return;
             }
           }
           bool equal(const edge_iterator& other) const
           {
             ASSERT_EQ(_type, other._type);
             switch (_type) {
              case CSC: return csc_iter == other.csc_iter;
              case CSR: return csr_iter == other.csr_iter;
              default: return true;
             }
           }
           edge_type dereference() const { 
             return make_value();
           }
           void decrement() {
             switch (_type) {
              case CSC: --csc_iter; break;
              case CSR: --csr_iter; break;
              default: return;
             }
           }
           void advance(int n) {
             switch (_type) {
              case CSC: csc_iter+=n; break;
              case CSR: csr_iter+=n; break;
              default: return;
             }
           } 
           ptrdiff_t distance_to(const edge_iterator& other) const {
             switch (_type) {
              case CSC: return other.csc_iter - csc_iter;
              case CSR: return other.csr_iter - csr_iter;
              default: return 0;
             }
           }
         private:
           edge_type make_value() const {
             switch (_type) {
              case CSC: {
                typename csc_edge_iterator::reference val
                    = *csc_iter;
                return edge_type(lgraph_ref, val.first, vid, val.second);
              }
              case CSR: {
                typename csr_edge_iterator::reference val
                    = *csr_iter;
                return edge_type(lgraph_ref,
                                 vid,
                                 val.template get<0>(),
                                 val.template get<1>());
              }
              default: return edge_type(lgraph_ref, -1, -1, -1);
             }
           }
           enum list_type {CSR, CSC}; 
           local_graph& lgraph_ref;
           const list_type _type;
           csc_edge_iterator csc_iter;
           csr_edge_iterator csr_iter;
           const lvid_type vid;
        }; // end of edge_iterator


    /**************************************************************************/
    /*                                                                        */
    /*                          PRIVATE DATA MEMBERS                          */
    /*                                                                        */
    /**************************************************************************/
    /** The vertex data is simply a vector of vertex data */
    std::vector<VertexData> vertices;

    /** Stores the edge data and edge relationships. */
    csr_type _csr_storage;
    csc_type _csc_storage;
    std::vector<EdgeData> edges;

    /** The edge data is a vector of edges where each edge stores its
        source, destination, and data. Used for temporary storage. The
        data is transferred into CSR+CSC representation in
        Finalize. This will be cleared after finalized.*/
    local_edge_buffer<VertexData, EdgeData> edge_buffer;
   
    /** Mark whether the local_graph is finalized.  Graph finalization is a
        costly procedure but it can also dramatically improve
        performance. */
    bool finalized;


    /**************************************************************************/
    /*                                                                        */
    /*                            declare friends                             */
    /*                                                                        */
    /**************************************************************************/
    friend class local_graph_test; 
  }; // End of class local_graph


  template<typename VertexData, typename EdgeData>
  std::ostream& operator<<(std::ostream& out,
                           local_graph<VertexData, EdgeData>& g) {
    typedef typename local_graph<VertexData, EdgeData>::edge_type edge_type;
    for(lvid_type vid = 0; vid < g.num_vertices(); ++vid) {
      foreach(const edge_type& e, g.out_edges(vid))
        out << e.source().id() << ", " << e.target().id() << '\n';
    }
    return out;
  }
} // end of namespace graphlab


namespace std {
  /**
   * Swap two graphs
   */
  template<typename VertexData, typename EdgeData>
  inline void swap(graphlab::local_graph<VertexData,EdgeData>& a,
                   graphlab::local_graph<VertexData,EdgeData>& b) {
    a.swap(b);
  } // end of swap

}; // end of namespace std

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/graph/local_graph_ops.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * \file graph_ops.hpp
 *
 * This file supports basic graph io operations to simplify reading
 * and writing adjacency structures from files.
 *
 */

#ifndef GRAPHLAB_LOCAL_GRAPH_OPS_HPP
#define GRAPHLAB_LOCAL_GRAPH_OPS_HPP


#include <iostream>
#include <fstream>
#include <string>

#include <boost/algorithm/string/predicate.hpp>
#include <graphlab/graph/local_graph.hpp>

#include <graphlab/macros_def.hpp>
namespace graphlab {
  

  namespace local_graph_ops {
    
    
    /**
     * builds a topological_sort of the graph returning it in topsort. 
     * 
     * \param[out] topsort Resultant topological sort of the graph vertices.
     *
     * function will return false if graph is not acyclic.
     */
    template <typename VertexType, typename EdgeType>
    bool topological_sort(const graphlab::local_graph<VertexType, EdgeType>& graph,
                          std::vector<vertex_id_type>& topsort) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      topsort.clear();
      topsort.reserve(graph.num_vertices());
      std::vector<size_t> indeg;
      indeg.resize(graph.num_vertices());
      std::queue<vertex_id_type> q;
      for (size_t i = 0;i < graph.num_vertices(); ++i) {
        indeg[i] = graph.get_in_edges(i).size();
        if (indeg[i] == 0) {
          q.push(i);
        }
      }
    
      while (!q.empty()) {
        vertex_id_type v = q.front();
        q.pop();
        topsort.push_back(v);
        foreach(typename graph_type::edge_type edge, graph.get_out_edges(v)) {
          vertex_id_type destv = edge.target();
          --indeg[destv];
          if (indeg[destv] == 0) {
            q.push(destv);
          }
        }
      }
      if (q.empty() && topsort.size() != graph.num_vertices()) {
        return false;
      }
      return true;
    } // end of topological sort


    template <typename VertexType, typename EdgeType>
    size_t num_neighbors(const graphlab::local_graph<VertexType, EdgeType>& graph,
                         vertex_id_type& vid) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      typename graph_type::edge_list_type in_edges =  graph.in_edges(vid); 
      typename graph_type::edge_list_type out_edges = graph.out_edges(vid);
      typename graph_type::edge_list_type::const_iterator i = in_edges.begin();
      typename graph_type::edge_list_type::const_iterator j = out_edges.begin();
      size_t count = 0;      
      for( ; i != in_edges.end() && j != out_edges.end(); ++count) {
        if(i->source() == j->target()) { 
          ++i; ++j; 
        } else if(i->source() < j->target()) { 
          ++i; 
        } else { 
          ++j; 
        }
      }
      for( ; i != in_edges.end(); ++i, ++count);
      for( ; j != out_edges.end(); ++j, ++count);
      return count;
    } // end of num_neighbors


    template <typename VertexType, typename EdgeType>
    void neighbors(const graphlab::local_graph<VertexType, EdgeType>& graph,
                   const vertex_id_type vid,   
                   std::vector<vertex_id_type>& neighbors ) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      typename graph_type::edge_list_type in_edges =  graph.in_edges(vid); 
      typename graph_type::edge_list_type out_edges = graph.out_edges(vid);
      typename graph_type::edge_list_type::const_iterator i = in_edges.begin();
      typename graph_type::edge_list_type::const_iterator j = out_edges.begin();
      while(i != in_edges.end() && j != out_edges.end()) {
        if(i->source() == j->target()) { 
          neighbors.push_back(i->source()); 
          ++i; ++j; 
        } else if(i->source() < j->target()) {
          neighbors.push_back(i->source()); 
          ++i; 
        } else { 
          neighbors.push_back(j->target()); 
          ++j; 
        } 
      }
      for( ; i != in_edges.end(); ++i) neighbors.push_back(i->source());
      for( ; j != out_edges.end(); ++j) neighbors.push_back(j->target());
    } // end of neighbors


    template <typename VertexType, typename EdgeType>
    bool save_metis_structure(const std::string& filename,
                              const graphlab::local_graph<VertexType, EdgeType>& graph) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;
    
      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;
      // Count the number of actual edges
      size_t nedges = 0;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i)
        nedges += num_neighbors(graph, i);
      fout << graph.num_vertices() << ' ' << (nedges/2) << '\n';
      // Save the adjacency structure
      std::vector<vertex_id_type> neighbor_set;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        neighbors(graph, i, neighbor_set);
        for(size_t j = 0; j < neighbor_set.size(); ++j) {
          fout << (neighbor_set[j] + 1);
          if(j + 1 < neighbor_set.size()) fout << ' ';
        }
        fout << '\n';
      }
      fout.close();
      return true;
    } // end of save metis


    template <typename VertexType, typename EdgeType>
    bool save_edge_list_structure(const std::string& filename,
                                  const graphlab::local_graph<VertexType, EdgeType>& graph) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;

      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) 
        foreach(edge_type edge, graph.out_edges(i)) 
          fout << edge.source() << '\t' << edge.target() << '\n';      
      fout.close();
      return true;
    } // end of save metis


    template <typename VertexType, typename EdgeType>
    bool save_zoltan_hypergraph_structure(const std::string& filename,
                                          const graphlab::local_graph<VertexType, EdgeType>& graph) {
      typedef graphlab::local_graph<VertexType, EdgeType> graph_type;
      typedef typename graph_type::edge_type          edge_type;
      typedef typename graph_type::edge_list_type     edge_list_type;

      std::ofstream fout(filename.c_str());
      if(!fout.good()) return false;

      // ok. I need to uniquely number each edge.
      // how?
      boost::unordered_map<std::pair<vertex_id_type, 
        vertex_id_type>, size_t> edgetoid;
      size_t curid = 0;
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        foreach(const typename graph_type::edge_type& edge, graph.in_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          if (edgetoid.find(e) == edgetoid.end()) {
            edgetoid[e] = curid;
            ++curid;
          }
        }
        foreach(const typename graph_type::edge_type& edge, graph.out_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          if (edgetoid.find(e) == edgetoid.end()) {
            edgetoid[e] = curid;
            ++curid;
          }
        }
      }

      size_t numedges = curid;
      // each edge is a vertex, each vertex is an edge
      // a pin is total adjacency of a hyper edge
      fout << numedges << "\n\n";
      for (size_t i = 0;i < numedges; ++i) {
        fout << i+1 << "\n";
      }
      fout << "\n";
      fout << graph.num_vertices() << "\n\n";
      
      fout << numedges * 2 << "\n\n";
      // loop over the "hyperedge" and write out the edges it is adjacent to
      for(vertex_id_type i = 0; i < graph.num_vertices(); ++i) {
        boost::unordered_set<size_t> adjedges;
        foreach(const typename graph_type::edge_type& edge, graph.in_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          adjedges.insert(edgetoid[e]);
        }
        foreach(const typename graph_type::edge_type& edge, graph.out_edges(i)) {
          std::pair<vertex_id_type, vertex_id_type> e = 
            std::make_pair(edge.source(), edge.target());
          if (e.first > e.second) std::swap(e.first, e.second);
          adjedges.insert(edgetoid[e]);
        }
        // write
        std::vector<size_t> adjedgesvec;
        std::copy(adjedges.begin(), adjedges.end(), 
                  std::inserter(adjedgesvec, adjedgesvec.end()));
        fout << i+1 << " " << adjedgesvec.size() << "\t";        
        for (size_t j = 0;j < adjedgesvec.size(); ++j) {
          fout << adjedgesvec[j] + 1;
          if (j < adjedgesvec.size() - 1) fout << "\t";
        }
        fout << "\n";
      }
      fout.close();
      return true;
    }  // end of save_zoltan_hypergraph_structure


  }; // end of graph ops
}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/graph/vertex_set.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_GRAPH_VERTEX_SET_HPP
#define GRAPHLAB_GRAPH_VERTEX_SET_HPP

#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab {

/**
 * \brief Describes a set of vertices
 *
 * The vertex_set describes a set of vertices upon which
 * union / intersection / difference can be performed.
 * These sets can then be passed into graph aggregate operations
 * such as distributed_graph::map_reduce_vertices to perform aggregates
 * over \b subsets of vertices or edges. Engines also permit signalling of
 * sets of vertices through
 * \ref graphlab::iengine::signal_all(const vertex_set& vset, const message_type& message, const std::string& order) "signal_all()".
 *
 * \ref distributed_graph::complete_set() and \ref distributed_graph::empty_set()
 * provide two convenient functions to obtain a full or an empty set of
 * vertices.
 * \code
 * vertex_set all = graph.complete_set();
 * vertex_set empty = graph.empty_set();
 * \endcode
 *
 * \ref distributed_graph::select() can be used to obtain a restriction of the
 * set of vertices. For instance if vertices contain an integer, the following
 * code will construct a set of vertices containing only vertices with data
 * which are a multiple of 2.
 *
 * \code
 * bool is_multiple_of_2(const graph_type::vertex_type& vertex) {
 *   return vertex.data() % 2 == 0;
 * }
 * vertex_set even_vertices = graph.select(is_multiple_of_2);
 * \endcode
 * For more details see \ref distributed_graph::select()
 *
 * The size of the vertex set can only be queried through the graph using
 * \ref distributed_graph::vertex_set_size();
 *
 */
class vertex_set {
  public:
    /**
     * Used only if \ref lazy is false.
     * If \ref lazy is false, this must be the same size as the graph's
     * graphlab::distributed_graph::num_local_vertices().
     * The invariant is that the bit value of each mirror vertex must be the
     * same value as the bit value on their corresponding master vertices.
     */
    mutable dense_bitset localvset;

    /**
     * Used only if \ref lazy is set.
     * If is_complete_set is true, this set describes the set of all vertices.
     * If is_complete set is false, this set describes the empty set.
     */
    bool is_complete_set;

    /**
     * If set, the localvset is empty and not used.
     * instead, \ref is_complete_set will define the set of vertices.
     */
    mutable bool lazy;


    /**
     * \internal
     * \brief Returns a const reference to the underlying bitset.
     */
    template <typename DGraphType>
    const dense_bitset& get_lvid_bitset(const DGraphType& dgraph) const {
      if (lazy) make_explicit(dgraph);
      return localvset;
    }


    /**
     * \internal
     * Sets a bit in the bitset without local threading
     * synchronization. vertex set must be made explicit. This call does not
     * perform remote synchronization and addititional distributed
     * synchronization calls must be made to restore datastructure invariants.
     */
    inline void set_lvid_unsync(lvid_type lvid) {
      ASSERT_FALSE(lazy);
      localvset.set_bit_unsync(lvid);
    }


    /**
     * \internal
     * Sets a bit in the bitset with local threading
     * synchronization. vertex set must be made explicit. This call does not
     * perform remote synchronization and addititional distributed
     * synchronization calls must be made to restore datastructure invariants.
     */
    inline void set_lvid(lvid_type lvid) {
      ASSERT_FALSE(lazy);
      localvset.set_bit(lvid);
    }

    /**
     * \internal
     * Makes the internal representation explicit by clearing the lazy flag
     * and filling the bitset.
     */
    template <typename DGraphType>
    void make_explicit(const DGraphType& dgraph) const {
      if (lazy) {
        localvset.resize(dgraph.num_local_vertices());
        if (is_complete_set) {
          localvset.fill();
        }
        else {
          localvset.clear();
        }
        lazy = false;
      }
    }

    /**
     * \internal
     * Copies the master state to each mirror.
     * Restores the datastructure invariants.
     */
    template <typename DGraphType>
    void synchronize_master_to_mirrors(DGraphType& dgraph,
                               buffered_exchange<vertex_id_type>& exchange) {
      if (lazy) {
        make_explicit(dgraph);
        return;
      }
      foreach(size_t lvid, localvset) {
        typename DGraphType::local_vertex_type lvtx = dgraph.l_vertex(lvid);
        if (lvtx.owned()) {
          // send to mirrors
          vertex_id_type gvid = lvtx.global_id();
          foreach(size_t proc, lvtx.mirrors()) {
            exchange.send(proc, gvid);
          }
        }
        else {
          localvset.clear_bit_unsync(lvid);
        }
      }
      exchange.flush();

      typename buffered_exchange<vertex_id_type>::buffer_type recv_buffer;
      procid_t sending_proc;

      while(exchange.recv(sending_proc, recv_buffer)) {
        foreach(vertex_id_type gvid, recv_buffer) {
          localvset.set_bit_unsync(dgraph.vertex(gvid).local_id());
        }
        recv_buffer.clear();
      }
      exchange.barrier();
    }


    /**
     * \internal
     * Let the master state be the logical OR of the mirror states.
     */
    template <typename DGraphType>
    void synchronize_mirrors_to_master_or(DGraphType& dgraph,
                               buffered_exchange<vertex_id_type>& exchange) {
      if (lazy) {
        make_explicit(dgraph);
        return;
      }
      foreach(size_t lvid, localvset) {
        typename DGraphType::local_vertex_type lvtx = dgraph.l_vertex(lvid);
        if (!lvtx.owned()) {
          // send to master
          vertex_id_type gvid = lvtx.global_id();
          exchange.send(lvtx.owner(), gvid);
        }
      }
      exchange.flush();

      typename buffered_exchange<vertex_id_type>::buffer_type recv_buffer;
      procid_t sending_proc;

      while(exchange.recv(sending_proc, recv_buffer)) {
        foreach(vertex_id_type gvid, recv_buffer) {
          localvset.set_bit_unsync(dgraph.vertex(gvid).local_id());
        }
        recv_buffer.clear();
      }
      exchange.barrier();
    }

    template <typename VertexType, typename EdgeType>
    friend class distributed_graph;

  public:
    /// default constructor which constructs an empty set.
    vertex_set():is_complete_set(false), lazy(true){}


    /** Constructs a completely empty, or a completely full vertex set
     * \param complete If set to true, creates a set of all vertices.
     *                 If set to false, creates an empty set.
     */
    explicit vertex_set(bool complete):is_complete_set(complete),lazy(true){}

    /// copy constructor
    inline vertex_set(const vertex_set& other):
        localvset(other.localvset),
        is_complete_set(other.is_complete_set),
        lazy(other.lazy) {}

    /// copyable
    inline vertex_set& operator=(const vertex_set& other) {
      localvset = other.localvset;
      is_complete_set = other.is_complete_set;
      lazy = other.lazy;
      return *this;
    }

    /**
     * \internal
     * Queries if a local vertex ID is contained within the vertex set
     */
    inline bool l_contains(lvid_type lvid) const {
      if (lazy) return is_complete_set;
      if (lvid < localvset.size()) {
        return localvset.get(lvid);
      }
      else {
        return false;
      }
    }

    /**
     * \brief Takes the set intersection of two vertex sets.
     *
     * \code
     *   vertex_set intersection_result = a & b;
     * \endcode
     * A vertex is in \c intersection_result if and only if the vertex is in
     * \b both set \c a and set \c b.
     */
    inline vertex_set operator&(const vertex_set& other) const {
      vertex_set ret = (*this);
      ret &= other;
      return ret;
    }

    /**
     * \brief Takes the set union of two vertex sets.
     *
     * \code
     *   vertex_set union_result = a | b;
     * \endcode
     * A vertex is in \c union_result if and only if the vertex is in
     * \b either of set \c a and set \c b.
     *
     */
    inline vertex_set operator|(const vertex_set& other) const {
      vertex_set ret = (*this);
      ret |= other;
      return ret;
    }


    /**
     * \brief Takes the set difference of two vertex sets.
     *
     * \code
     *   vertex_set difference_result = a - b;
     * \endcode
     * A vertex is in \c difference_result if and only if the vertex is in
     * set a and not in set b.
     *
     * Equivalent to:
     *
     * \code
     *  vertex_set inv_b = ~b;
     *  vertex_set difference_result = a & inv_b;
     * \endcode
     */
    inline vertex_set operator-(const vertex_set& other) const {
      vertex_set ret = (*this);
      ret -= other;
      return ret;
    }


    /**
     * \brief Takes the set intersection of the current vertex set with another
     * vertex set.
     *
     * \code
     *   a &= b;
     * \endcode
     * A vertex is in the resultant \c a if and only if the vertex was in
     * \b both set \c a and set \c b.
     */
    inline vertex_set& operator&=(const vertex_set& other) {
      if (lazy) {
        if (is_complete_set) (*this) = other;
        else (*this) = vertex_set(false);
      }
      else if (other.lazy) {
        if (other.is_complete_set) /* no op */;
        else (*this) = vertex_set(false);
      }
      else {
        localvset &= other.localvset;
      }
      return *this;
    }

    /**
     * \brief Takes the set union of the current vertex set with another
     * vertex set.
     *
     * \code
     *   a |= b;
     * \endcode
     * A vertex is in the resultant \c a if and only if the vertex was in
     * \b either set \c a and set \c b.
     */
    inline vertex_set& operator|=(const vertex_set& other) {
      if (lazy) {
        if (is_complete_set) (*this) = vertex_set(true);
        else (*this) = other;
      }
      else if (other.lazy) {
        if (other.is_complete_set) (*this) = vertex_set(true);
        else /* no op */;
      }
      else {
        localvset |= other.localvset;
      }
      return *this;
    }

    /**
     * \brief Takes the set difference of the current vertex set with another
     * vertex set.
     *
     * \code
     *   a -= b;
     * \endcode
     * A vertex is in the resultant \c a if and only if the vertex was in
     * set \c a but not in set \c b.
     *
     * Conceptually equivalent to
     * \code
     *   a &= ~b;
     * \endcode
     */
    inline vertex_set& operator-=(const vertex_set& other) {
      if (lazy) {
        if (is_complete_set) (*this) = ~other;
        else (*this) = vertex_set(false);
      }
      else if (other.lazy) {
        if (other.is_complete_set) (*this) = vertex_set(false);
        else /* no op */;
      }
      else {
        localvset -= other.localvset;
      }
      return *this;
    }

    /**
     * \brief Returns the inverse of the current set.
     *
     * \code
     *  vertex_set inv_b = ~b;
     * \endcode
     * A vertex is in \c inv_b if and only if it is not in \c b
     */
    inline vertex_set operator~() const {
      vertex_set ret(*this);
      ret.invert();
      return ret;
    }

    /**
     * \brief Inverts the current set in-place.
     *
     * \code
     * b.invert();
     * \endcode
     * A vertex is in the result \c b if and only if it is not in \c b
     */
    inline void invert() {
      if (lazy) {
        is_complete_set = !is_complete_set;
      }
      else {
        localvset.invert();
      }
    }


};


} // namespace graphlab
#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/jni/CMakeLists.txt
================================================
project(GraphLab)
# NOTE: do not link tcmalloc! Does not like Java.
add_jni_library(graphlabjni
  org_graphlab_Updater.cpp
  org_graphlab_Aggregator.cpp
  org_graphlab_Context.cpp
  org_graphlab_Core.cpp
  java_any.cpp)

if(JNI_REALLY_FOUND)
  # copy to ${SRC}/extapis/java_jni/lib when done
  get_target_property(graphlabjni_location graphlabjni LOCATION)
  add_custom_command(
    TARGET graphlabjni
    POST_BUILD
    COMMAND cp ${graphlabjni_location}
    ${CMAKE_SOURCE_DIR}/extapis/java_jni/lib/
    COMMENT "Copying graphlabjni to extapis/java_jni/lib")
  
  # if ant is found, build Java classes
  if(ANT_FOUND)
    add_custom_command(
      TARGET graphlabjni
      POST_BUILD
      COMMAND cd ${CMAKE_SOURCE_DIR}/extapis/java_jni && ant
      COMMENT "Building Java classes using Ant.")
  else()
    message(STATUS
      "Ant not detected. Java classes will not be built.") 
  endif()

endif()

================================================
FILE: src/graphlab/jni/java_any.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 
/**
 * @file java_any.cpp
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */ 
 
#include <graphlab.hpp>
#include "java_any.hpp"
#include "org_graphlab_Core.hpp"
#include "org_graphlab_Updater.hpp"

using namespace graphlab;

//---------------------------------------------------------------
// java_any instance members
//---------------------------------------------------------------

java_any::java_any(JNIEnv *env, jobject &obj){
  // create a new ref so that it doesn't get garbage collected
  mobj = env->NewGlobalRef(obj);
}

java_any::java_any() : mobj(NULL){}

jobject &java_any::obj() {
  return mobj;
}

const jobject &java_any::obj() const {
  return mobj;
}

java_any::java_any(const java_any& other){
    
  // other doesn't have an existing ref
  if (NULL == other.mobj){
    this->mobj = NULL;
    return;
  }
  
  // create a new ref
  JNIEnv *env = proxy_updater::core::get_jni_env();
  this->mobj = env->NewGlobalRef(other.mobj);
  
}

java_any &java_any::operator=(const java_any& other){
    
  // prevent self assignment
  if (this == &other) return *this;
  
  JNIEnv *env = proxy_updater::core::get_jni_env();
  jobject obj = NULL;
  
  // if other has a java object, create a new ref
  if (NULL != other.mobj){
    obj = env->NewGlobalRef(other.mobj);
  }
  
  // if this has a java object, delete ref
  if (NULL != this->mobj){
    env->DeleteGlobalRef(this->mobj);
  }
    
  // assign!
  this->mobj = obj;
  
  return *this;
  
}

void java_any::set_obj(jobject obj){

  JNIEnv *env = proxy_updater::core::get_jni_env();
  
  if (NULL != mobj){
    // delete current ref
    env->DeleteGlobalRef(mobj);
    mobj = NULL;
  }
  
  if (NULL != obj){
    mobj = env->NewGlobalRef(obj);
  }
  
}

java_any::~java_any(){
  if (NULL == mobj) return;
  // delete reference to allow garbage collection
  JNIEnv *env = proxy_updater::core::get_jni_env();
  env->DeleteGlobalRef(mobj);
  mobj = NULL;
}

bool java_any::handle_exception(JNIEnv *env) const {
  
  // check for exception
  jthrowable exc = env->ExceptionOccurred();
  if (!exc) return false;

  env->ExceptionDescribe();
  env->ExceptionClear();
  proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "thrown from C code.");
  
  return true;
  
}

================================================
FILE: src/graphlab/jni/java_any.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 
/**
 * @file java_any.hpp
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */
 
#ifndef JAVA_ANY_HPP
#define JAVA_ANY_HPP

#include <jni.h>

namespace graphlab {

  /**
   * Generic wrapper for Java objects (jobject). It creates a NewGlobalRef on
   * the jobject in the default constructor, and deletes the GlobalRef in the
   * destructor. An assignment operator is also provided to deal with creating
   * and deleting edges. Subclasses should provide a copy constructor, and there
   * are two scenarios: NewGlobalRef during copy, or object clone during copy.
   */
  class java_any {
  
  private:
    
    /** Java object */
    jobject mobj;
    
  public:
  
    /**
     * Constructor for java_any.
     * Initializes this object with the associated Java object.
     * @param[in] env           JNI environment - used to create a new reference
     *                          to obj
     * @param[in] obj           Java object. This constructor will create a new
     *                          reference to the object to prevent garbage
     *                          collection.
     */
    java_any(JNIEnv *env, jobject &obj);
    
    /** The default constructor does nothing. mobj is initialized to NULL. */
    java_any();
    
    /**
     * Copy constructor
     * If `other` has a reference to a java object, increases the reference count.
     * Child classes may wish to override this to implement clone behavior.
     */
    java_any(const java_any& other);  
    
    /**
     * Copy assignment operator for java_any.
     * If \c other has a \c mobj, creates a new reference to it.
     */
    java_any &operator=(const java_any &other);
    
    /**
     * Retrieves the associated Java object
     */
    jobject &obj();
    const jobject &obj() const;
    
    /**
     * Deletes the reference to the Java object so that it may be garbage
     * collected.
     */
    ~java_any();
    
  protected:
  
    /**
     * Deletes the current ref (if any) and creates a new ref if `obj` is not null.
     * @param[in] obj     replaces current object ref
     */
    void set_obj(jobject obj);
    
    /**
     * Checks for and rethrows Java exceptions.
     * @param[in] env     JNI environment
     * @return true if exception was found; false otherwise
     */
    bool handle_exception(JNIEnv *env) const;
  
  };

};

#endif

================================================
FILE: src/graphlab/jni/org_graphlab_Aggregator.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include "org_graphlab_Aggregator.hpp"

using namespace graphlab;

//---------------------------------------------------------------
// proxy_aggregator static members
//---------------------------------------------------------------

jmethodID proxy_aggregator::java_exec     = 0;
jmethodID proxy_aggregator::java_add      = 0;
jmethodID proxy_aggregator::java_finalize = 0;
jmethodID proxy_aggregator::java_clone    = 0;

JNIEXPORT void JNICALL
  Java_org_graphlab_Aggregator_initNative
  (JNIEnv *env, jclass clazz){

  // Aggregator#exec
  proxy_aggregator::java_exec =
    env->GetMethodID(clazz, "exec", "(JLorg/graphlab/data/Vertex;)V");
  
  // Aggregator#add
  proxy_aggregator::java_add =
    env->GetMethodID(clazz, "add", "(Lorg/graphlab/Aggregator;)V");
  
  // Aggregator#finalize
  proxy_aggregator::java_finalize = 
    env->GetMethodID(clazz, "finalize", "(J)V");
  
  // Aggregator#clone
  proxy_aggregator::java_clone = 
    env->GetMethodID(clazz, "clone", "()Lorg/graphlab/Aggregator;");
  
}

//---------------------------------------------------------------
// proxy_aggregator instance members
//---------------------------------------------------------------

proxy_aggregator::
  proxy_aggregator(JNIEnv *env, jobject &obj)
  : java_any(env, obj){}

proxy_aggregator::proxy_aggregator()
  : java_any(){}

proxy_aggregator::
  proxy_aggregator(const proxy_aggregator& other) {

  // other doesn't have an existing ref
  if (NULL == other.obj()){
    set_obj(NULL);
    return;
  }
  
  // clone the java object
  JNIEnv *env = core::get_jni_env();
  set_obj(env->CallObjectMethod(other.obj(), java_clone));
  
}

proxy_aggregator &proxy_aggregator::operator=(const proxy_aggregator& other){
    
  if (this == &other) return *this;
  java_any::operator=(other);
  return *this;
  
}

proxy_aggregator::~proxy_aggregator(){}

//---------------------------------------------------------------
// proxy_aggregator instance members - the update function
//---------------------------------------------------------------

void proxy_aggregator::operator()(icontext_type& context){
  
  // forward call to org.graphlab.Aggregator#exec
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod (obj(), java_exec,
                       &context,
                       context.vertex_data().obj());
  handle_exception(env);

}

//---------------------------------------------------------------
// proxy_aggregator instance members - the add function
//---------------------------------------------------------------

void proxy_aggregator::operator+=(const proxy_aggregator& other) {
  
  // forward call to org.graphlab.Aggregator#add
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod (obj(), java_add, other.obj());
  handle_exception(env);

}

//---------------------------------------------------------------
// proxy_aggregator instance members - the finalize function
//---------------------------------------------------------------

void proxy_aggregator::finalize(iglobal_context& context){

  // forward call to org.graphlab.Aggregator#finalize
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod (obj(), java_finalize, &context);
  handle_exception(env);

}

================================================
FILE: src/graphlab/jni/org_graphlab_Aggregator.h
================================================
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_graphlab_Aggregator */

#ifndef _Included_org_graphlab_Aggregator
#define _Included_org_graphlab_Aggregator
#ifdef __cplusplus
extern "C" {
#endif
/*
 * Class:     org_graphlab_Aggregator
 * Method:    initNative
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Aggregator_initNative
  (JNIEnv *, jclass);

#ifdef __cplusplus
}
#endif
#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Aggregator.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * @file org_graphlab_Aggregator.hpp
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#ifndef ORG_GRAPHLAB_AGGREGATOR_HPP
#define ORG_GRAPHLAB_AGGREGATOR_HPP

#include <graphlab.hpp>
#include "java_any.hpp"
#include "org_graphlab_Core.hpp"
#include "org_graphlab_Updater.hpp"

#include "org_graphlab_Aggregator.h"

namespace graphlab {
  
  /**
   * Proxy aggregator.
   * Mirrors and forwards calls to the corresponding Java aggregator.
   * The constructor creates a new reference to the Java object (so that it
   * doesn't get garbage collected.) The destructor will delete the reference
   * to allow the corresponding Java object to be garbaged collected. The copy
   * constructor clones the Java object.
   */
  class proxy_aggregator : 
    public iaggregator<proxy_graph, proxy_updater, proxy_aggregator>,
    public java_any {
    
  private:
  
    typedef proxy_updater::core core;
    
  public:
  
    /**
     * Method ID of org.graphlab.Aggregator#exec.
     */
    static jmethodID java_exec;
    
    /**
     * Method ID of org.graphlab.Aggregator#add.
     */
    static jmethodID java_add;
    
    /**
     * Method ID of org.graphlab.Aggregator#finalize.
     */
    static jmethodID java_finalize;
    
    /**
     * Method ID of org.graphlab.Aggregator#clone
     */
    static jmethodID java_clone;
    
    /**
     * Constructor for proxy aggregator.
     * Initializes this object with the associated Java org.graphlab.Updater
     * object.
     * @param[in] env           JNI environment - used to create a new reference
     *                          to javaUpdater.
     * @param[in] java_aggregator  Java org.graphlab.Aggregator object. This constructor
     *                          will create a new reference to the object to prevent
     *                          garbage collection.
     */
    proxy_aggregator(JNIEnv *env, jobject &java_aggregator);
    
    /** The default constructor does nothing */
    proxy_aggregator();
    
    /**
     * Copy constructor for proxy_aggregator.
     * If \c other has a \c mobj, creates a new reference to it.
     */
    proxy_aggregator(const proxy_aggregator& other);
    
    /**
     * Copy assignment operator for proxy_aggregator.
     * If \c other has a \c mobj, creates a new reference to it.
     */
    proxy_aggregator &operator=(const proxy_aggregator &other);
    
    /**
     * Deletes the reference to the Java object so that it may be garbage
     * collected.
     */
    ~proxy_aggregator();
    
    void operator()(icontext_type& context);
    
    void operator+=(const proxy_aggregator& other);
    
    void finalize(iglobal_context& context);
    
  };

};

#endif

================================================
FILE: src/graphlab/jni/org_graphlab_Context.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 
/**
 * @file org_graphlab_Context.cpp
 * Implementations of native methods in \c org.graphlab.Context. Refer to
 * corresponding Javadoc.
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#include "org_graphlab_Context.hpp"
#include "org_graphlab_Updater.hpp"

using namespace graphlab;

#ifdef __cplusplus
extern "C" {
#endif

  JNIEXPORT void JNICALL
  Java_org_graphlab_Context_schedule
  ( JNIEnv *env,
    jobject obj,
    jlong context_ptr,
    jobject updater,
    jint vertex_id){

    // convert longs to pointers
    proxy_updater::context *context = (proxy_updater::context *) context_ptr;
    context->schedule(vertex_id, proxy_updater(env, updater));
    
  }

#ifdef __cplusplus
}
#endif

================================================
FILE: src/graphlab/jni/org_graphlab_Context.h
================================================
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_graphlab_Context */

#ifndef _Included_org_graphlab_Context
#define _Included_org_graphlab_Context
#ifdef __cplusplus
extern "C" {
#endif
/*
 * Class:     org_graphlab_Context
 * Method:    schedule
 * Signature: (JLorg/graphlab/Updater;I)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Context_schedule
  (JNIEnv *, jobject, jlong, jobject, jint);

#ifdef __cplusplus
}
#endif
#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Context.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * @file org_graphlab_Context.hpp
 * \c javah will generate \c org_graphlab_Context.h from the native methods
 * defined in \c org.graphlab.Context (and so will overwrite the file every time).
 * Define any additional classes/structs/typedefs in this hpp file.
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#ifndef ORG_GRAPHLAB_CONTEXT_HPP
#define ORG_GRAPHLAB_CONTEXT_HPP

#include <graphlab.hpp>
#include "org_graphlab_Context.h"

// define any additional classes/structs/typedefs here

#endif

================================================
FILE: src/graphlab/jni/org_graphlab_Core.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * @file org_graphlab_Core.cpp
 *
 * Contains the JNI interface for org.graphlab.Core. In general, applications
 * will keep their graphs in the Java layer and access the engine through the
 * JNI. This wrapper provides a proxy graph for the engine to manipulate and
 * forwards update calls to the Java layer. To learn how to use this interface,
 * refer to the org.graphlab.Core class and to the examples.
 *
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#include <wordexp.h>
#include "org_graphlab_Core.hpp"
#include "org_graphlab_Updater.hpp"
#include "org_graphlab_Aggregator.hpp"

using namespace graphlab;

//---------------------------------------------------------------
// jni_core static members
//---------------------------------------------------------------

template<typename G, typename U>
JavaVM* jni_core<G, U>::mjvm = NULL;

template<typename G, typename U>
const size_t jni_core<G, U>::ENV_ID = 1;

#ifdef __cplusplus
extern "C" {
#endif

//---------------------------------------------------------------
// static helper functions
//---------------------------------------------------------------

  static jlong createCore (JNIEnv *env, jobject obj, int argc, char **argv){
  
    // configure log level
    global_logger().set_log_level(LOG_DEBUG);
    global_logger().set_log_to_console(false);

    // set jvm, if we don't have it already
    if (NULL == proxy_updater::core::get_jvm()){
      JavaVM* jvm = NULL;
      env->GetJavaVM(&jvm);
      proxy_updater::core::set_jvm(jvm);
    }
    
    // store env for this thread
    thread::get_local(proxy_updater::core::ENV_ID) = env;
    
    // allocate and configure core
    proxy_updater::core *jni_core = new proxy_updater::core(env, obj);
    if (NULL != argv){
      (*jni_core)().parse_options(argc, argv);
    }
    
    // return address of jni_core
    return (long) jni_core;
  
  }
  
//---------------------------------------------------------------
// JNI functions
//---------------------------------------------------------------

  JNIEXPORT jlong JNICALL
  Java_org_graphlab_Core_createCore__
  (JNIEnv *env, jobject obj){
    return createCore(env, obj, 0, NULL);
  }
  
  JNIEXPORT jlong JNICALL
  Java_org_graphlab_Core_createCore__Ljava_lang_String_2
  (JNIEnv *env, jobject obj, jstring command_line_args){
  
    // convert jstring to c string
    const char *cstr = NULL;
    cstr = env->GetStringUTFChars(command_line_args, NULL);
    if (NULL == cstr) {
       return 0; /* OutOfMemoryError already thrown */
    }
    
    // prepend with dummy name
    char buffer[1024];
    snprintf(buffer, 1024, "x %s", cstr);
    env->ReleaseStringUTFChars(command_line_args, cstr);
    
    // split string
    wordexp_t we_result;
    if (0 != wordexp(buffer, &we_result, 0)) return 0;
   
    // create core
    jlong ptr = createCore(env, obj, we_result.we_wordc, we_result.we_wordv);
   
    // cleanup
    wordfree(&we_result);
    return ptr;
  
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_destroyCore
  (JNIEnv *env, jobject obj, jlong ptr){
    
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
      return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // cleanup core
    delete jni_core;
    
  }

  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_resizeGraph
  (JNIEnv *env, jobject obj, jlong ptr, jint count){
    
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
      return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    (*jni_core)().graph().resize(count);
    
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_addVertex
  (JNIEnv *env, jobject obj, jlong ptr,
   jobject app_vertex, jint vertex_id){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
      return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // add to graph
    (*jni_core)().graph()
      .add_vertex(vertex_id, proxy_vertex(env, app_vertex));
       
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_addEdge
  (JNIEnv *env, jobject obj, jlong ptr,
  jint source, jint target, jobject app_edge){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // add to graph
    (*jni_core)().graph().add_edge(source, target, proxy_edge(env, app_edge));
  
  }
  
  JNIEXPORT jdouble JNICALL
  Java_org_graphlab_Core_start
  (JNIEnv *env, jobject obj, jlong ptr){
    
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return 0;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    (*jni_core)().engine().get_options().print();
    
    double runtime = (*jni_core)().start();
    return runtime;
    
  }
  
  JNIEXPORT jlong JNICALL
  Java_org_graphlab_Core_lastUpdateCount
  (JNIEnv *env, jobject obj, jlong ptr){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return 0;
    }
  
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    return (*jni_core)().engine().last_update_count();
    
  }

  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_addGlobalConst
  (JNIEnv *env, jobject obj, jlong ptr, jstring key, jobject to_store){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // convert jstring to c string
    const char * key_str = env->GetStringUTFChars(key, NULL);
    
    (*jni_core)().add_global_const(std::string(key_str), java_any(env, to_store));
    
    // free memory
    env->ReleaseStringUTFChars(key, key_str);
    
    return;
  
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_addGlobal
  (JNIEnv *env, jobject obj, jlong ptr, jstring key, jobject to_store){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // convert jstring to c string
    const char * key_str = env->GetStringUTFChars(key, NULL);
    
    java_any a = java_any(env, to_store);
    (*jni_core)().add_global(std::string(key_str), a);
    
    // free memory
    env->ReleaseStringUTFChars(key, key_str);
    
    return;
  
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_setGlobal
  (JNIEnv *env, jobject obj, jlong ptr, jstring key, jobject to_store){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // convert jstring to c string
    const char * key_str = env->GetStringUTFChars(key, NULL);
    
    java_any a = java_any(env, to_store);
    (*jni_core)().set_global(std::string(key_str), a);
    
    // free memory
    env->ReleaseStringUTFChars(key, key_str);
    
    return;
  
  }
  

  JNIEXPORT jobject JNICALL
  Java_org_graphlab_Core_getGlobal
  (JNIEnv *env, jobject obj, jlong ptr, jstring key){
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return NULL;
    }
    
     proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    
    // convert jstring to c string
    const char * key_str = env->GetStringUTFChars(key, NULL);
    
    java_any stored = (*jni_core)().get_global<java_any>(std::string(key_str));
    
    // free memory
    env->ReleaseStringUTFChars(key, key_str);
    
    return env->NewLocalRef(stored.obj());
  
  }

  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_setNCpus
  (JNIEnv * env, jobject obj, jlong ptr, jlong ncpus) {
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
  
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    (*jni_core)().set_ncpus(ncpus);
    
  }

  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_setSchedulerType
  (JNIEnv * env, jobject obj, jlong ptr, jstring scheduler_str) {
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
  
    const char *str = env->GetStringUTFChars(scheduler_str, NULL);
    if (NULL == str) return;  // OutOfMemoryError already thrown
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    (*jni_core)().set_scheduler_type(std::string(str));
    env->ReleaseStringUTFChars(scheduler_str, str);
    
  }

  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_setScopeType
  (JNIEnv * env, jobject obj, jlong ptr, jstring scope_str) {
  
    if (NULL == env || 0 == ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "ptr must not be null.");
        return;
    }
  
    const char *str = env->GetStringUTFChars(scope_str, NULL);
    if (NULL == str) return;  // OutOfMemoryError already thrown
    
    proxy_updater::core *jni_core = (proxy_updater::core *) ptr;
    (*jni_core)().set_scope_type(std::string(str));
    env->ReleaseStringUTFChars(scope_str, str);
    
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_schedule
  (JNIEnv * env, jobject obj,
  jlong core_ptr, jobject updater, jint vertex_id){
  
    if (NULL == env || 0 == core_ptr){
      proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "core_ptr must not be null.");
        return;
    }

    // get objects from pointers
    proxy_updater::core *jni_core = (proxy_updater::core *) core_ptr;

    // schedule vertex
    (*jni_core)().schedule(vertex_id, proxy_updater(env, updater));
    
  }
  
  JNIEXPORT void JNICALL 
  Java_org_graphlab_Core_scheduleAll
  (JNIEnv * env, jobject obj,
  jlong core_ptr, jobject updater) {

    if (NULL == env || 0 == core_ptr){
    proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "core_ptr and updater_ptr must not be null.");
        return;
    }

    // get objects from pointers
    proxy_updater::core *jni_core = (proxy_updater::core *) core_ptr;

    // schedule vertex
    (*jni_core)().schedule_all(proxy_updater(env, updater));

  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_addAggregator
  (JNIEnv * env, jobject obj,
  jlong core_ptr, jstring key, jobject aggregator,
  jlong frequency){
    
    if (NULL == env || 0 == core_ptr){
    proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "core_ptr and updater_ptr must not be null.");
        return;
    }

    // get objects from pointers
    proxy_updater::core *jni_core = (proxy_updater::core *) core_ptr;

    // add aggregator
    const char * key_str = env->GetStringUTFChars(key, NULL);
    (*jni_core)().add_aggregator(std::string(key_str),
                                proxy_aggregator(env, aggregator),
                                frequency);
    env->ReleaseStringUTFChars(key, key_str);
    
  }
  
  JNIEXPORT void JNICALL
  Java_org_graphlab_Core_aggregateNow
  (JNIEnv * env, jobject obj,
  jlong core_ptr, jstring key){
  
    if (NULL == env || 0 == core_ptr){
    proxy_updater::core::throw_exception(
        env,
        "java/lang/IllegalArgumentException",
        "core_ptr and updater_ptr must not be null.");
        return;
    }

    // get objects from pointers
    proxy_updater::core *jni_core = (proxy_updater::core *) core_ptr;

    // add aggregator
    const char * key_str = env->GetStringUTFChars(key, NULL);
    (*jni_core)().aggregate_now(std::string(key_str));
    env->ReleaseStringUTFChars(key, key_str);
  
  }

#ifdef __cplusplus
}
#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Core.h
================================================
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_graphlab_Core */

#ifndef _Included_org_graphlab_Core
#define _Included_org_graphlab_Core
#ifdef __cplusplus
extern "C" {
#endif
/*
 * Class:     org_graphlab_Core
 * Method:    createCore
 * Signature: ()J
 */
JNIEXPORT jlong JNICALL Java_org_graphlab_Core_createCore__
  (JNIEnv *, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    createCore
 * Signature: (Ljava/lang/String;)J
 */
JNIEXPORT jlong JNICALL Java_org_graphlab_Core_createCore__Ljava_lang_String_2
  (JNIEnv *, jobject, jstring);

/*
 * Class:     org_graphlab_Core
 * Method:    destroyCore
 * Signature: (J)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_destroyCore
  (JNIEnv *, jobject, jlong);

/*
 * Class:     org_graphlab_Core
 * Method:    resizeGraph
 * Signature: (JI)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_resizeGraph
  (JNIEnv *, jobject, jlong, jint);

/*
 * Class:     org_graphlab_Core
 * Method:    addVertex
 * Signature: (JLorg/graphlab/data/Vertex;)I
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_addVertex
  (JNIEnv *, jobject, jlong, jobject, jint);

/*
 * Class:     org_graphlab_Core
 * Method:    addEdge
 * Signature: (JII)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_addEdge
  (JNIEnv *, jobject, jlong, jint, jint, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    schedule
 * Signature: (JLorg/graphlab/Updater;I)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_schedule
  (JNIEnv *, jobject, jlong, jobject, jint);

/*
 * Class:     org_graphlab_Core
 * Method:    scheduleAll
 * Signature: (JLorg/graphlab/Updater;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_scheduleAll
  (JNIEnv *, jobject, jlong, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    start
 * Signature: (J)D
 */
JNIEXPORT jdouble JNICALL Java_org_graphlab_Core_start
  (JNIEnv *, jobject, jlong);

/*
 * Class:     org_graphlab_Core
 * Method:    lastUpdateCount
 * Signature: (J)J
 */
JNIEXPORT jlong JNICALL Java_org_graphlab_Core_lastUpdateCount
  (JNIEnv *, jobject, jlong);

/*
 * Class:     org_graphlab_Core
 * Method:    addGlobalConst
 * Signature: (JLjava/lang/String;Ljava/lang/Object;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_addGlobalConst
  (JNIEnv *, jobject, jlong, jstring, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    addGlobal
 * Signature: (JLjava/lang/String;Ljava/lang/Object;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_addGlobal
  (JNIEnv *, jobject, jlong, jstring, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    getGlobal
 * Signature: (JLjava/lang/String;)Ljava/lang/Object;
 */
JNIEXPORT jobject JNICALL Java_org_graphlab_Core_getGlobal
  (JNIEnv *, jobject, jlong, jstring);

/*
 * Class:     org_graphlab_Core
 * Method:    setGlobal
 * Signature: (JLjava/lang/String;Ljava/lang/Object;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_setGlobal
  (JNIEnv *, jobject, jlong, jstring, jobject);

/*
 * Class:     org_graphlab_Core
 * Method:    addAggregator
 * Signature: (JLjava/lang/String;Lorg/graphlab/Aggregator;J)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_addAggregator
  (JNIEnv *, jobject, jlong, jstring, jobject, jlong);

/*
 * Class:     org_graphlab_Core
 * Method:    aggregateNow
 * Signature: (JLjava/lang/String;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_aggregateNow
  (JNIEnv *, jobject, jlong, jstring);

/*
 * Class:     org_graphlab_Core
 * Method:    setNCpus
 * Signature: (JJ)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_setNCpus
  (JNIEnv *, jobject, jlong, jlong);

/*
 * Class:     org_graphlab_Core
 * Method:    setSchedulerType
 * Signature: (JLjava/lang/String;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_setSchedulerType
  (JNIEnv *, jobject, jlong, jstring);

/*
 * Class:     org_graphlab_Core
 * Method:    setScopeType
 * Signature: (JLjava/lang/String;)V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Core_setScopeType
  (JNIEnv *, jobject, jlong, jstring);

#ifdef __cplusplus
}
#endif
#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Core.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 
/**
 * @file org_graphlab_Core.hpp
 * \c javah will generate \c org_graphlab_Core.h from the native methods
 * defined in \c org.graphlab.Context (and so will overwrite the file every time).
 * Define any additional classes/structs/typedefs in this hpp file.
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#ifndef ORG_GRAPHLAB_CORE_HPP
#define ORG_GRAPHLAB_CORE_HPP

#include <execinfo.h>
#include <graphlab.hpp>

#include "java_any.hpp"
#include "org_graphlab_Core.h"

namespace graphlab {

  /**
   * Wrapper for graphlab::core.
   * Contains the core, a reference to the Java core object (so that it
   * doesn't get garbage collected), and other utility functions for dealing
   * with the JVM.
   */
  template <typename Graph, typename UpdateFunctor>
  class jni_core : public java_any {
    
  public:
  
    /** ID of pointer to JNI environment in thread local store */
    static const size_t ENV_ID;
    
  private:
  
    typedef core<Graph, UpdateFunctor> core_type;
    
    /** graphlab::core object - the soul that this body wraps around */
    core_type *mcore;
    
    /** Java virtual machine reference - set only once for each process */
    static JavaVM *mjvm;
    
  public:

    /**
     * Creates a new graphlab core and a new reference to the associated
     * Java org.graphlab.Core object (so that it doesn't get garbage collected.)
     * @param[in] env   JNI environment, which will be used to create the
     *                  reference to the Java object.
     * @param[in] obj   associated org.graphlab.Core object.
     */
    jni_core (JNIEnv *env, jobject &obj) : java_any (env, obj) {
      this->mcore = new core_type();
    }
    
    /**
     * Gets the real graphlab core that this method wraps around
     * @return graphlab::core
     */
    core_type &operator()(){
      return *mcore;
    }
    
    /**
     * Deallocates the graphlab core. Parent constructor will delete jobject
     * reference.
     */
    ~jni_core(){
      delete mcore;
    }
    
    /**
     * Saves a reference to the Java Virtual Machine.
     * @param[in] jvm   pointer to the Java Virtual Machine
     */
    static void set_jvm (JavaVM *jvm){
      mjvm = jvm;
    }
    
    /**
     * Gets a reference to the Java Virtual Machine.
     * @return pointer to the Java Virtual Machine
     */
    static JavaVM *get_jvm (){
      return mjvm;
    }
    
    /**
     * Detaches the current thread from the JVM.
     * If a pointer to the JNI environment cannot be found in the thread-local
     * store, that means that this thread has already been detached, and the
     * function will return immediately. Otherwise, the thread is detached and
     * the pointer to the JNI environment is removed from the thread-local
     * store.
     */
    static void detach_from_jvm() {
      
      // if the current thread is still attached, detach it
      if (thread::contains(ENV_ID)) {
        int res = mjvm->DetachCurrentThread();
        assert(res >= 0);
        thread::get_local(ENV_ID) = NULL;
      }
      
    }
    
    static void dump_backtrace(int sig){
    
      void *array[10];
      size_t size;
      
      // get void*'s for all entries on the stack
      size = backtrace(array, 10);
      
      // print out all the frames to stderr
      backtrace_symbols_fd(array, size, 2);
      
    }
    
    /** Convenience method for throwing Java exceptions */
    static void throw_exception(JNIEnv* env,
                                const char *exception,
                                const char *message){
      jclass exc = env->FindClass(exception);
      if (NULL == exc) return;
      env->ThrowNew(exc, message);
    }
    
    /**
     * Retrieves the JNI environment for the current thread.
     * If a pointer to the JNI environment can be found in the thread-local
     * store, returns immediately; otherwise, that means that the current
     * thread has not been attached to the JVM yet. In that case, this 
     * function will attach the current thread to the JVM and save the
     * associated JNI environment to the thread-local storage.
     * @return JNI environment associated with the current thread.
     */
    static JNIEnv *get_jni_env (){
    
      JNIEnv *jenv = NULL;
    
      // if current thread is not already on the JVM, attach it    
      if (!thread::contains(ENV_ID)) {
      
        int res = mjvm->AttachCurrentThread((void **)&jenv, NULL);
        assert(res >= 0);
        
        // store JNI environment in thread-local storage
        thread::get_local(ENV_ID) = jenv;
        thread::set_thread_destroy_callback(detach_from_jvm);
          
      }
      
      // return the environment associated with the current thread
      return thread::get_local(ENV_ID).as<JNIEnv *>();
      
    }
    
  };

}

#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Updater.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include "org_graphlab_Updater.hpp"

using namespace graphlab;

//---------------------------------------------------------------
// proxy_updater static members
//---------------------------------------------------------------

jmethodID proxy_updater::java_update                  = 0;
jmethodID proxy_updater::java_add                     = 0;
jmethodID proxy_updater::java_priority                = 0;
jmethodID proxy_updater::java_clone                   = 0;
jmethodID proxy_updater::java_is_factorizable         = 0;
jmethodID proxy_updater::java_gather_edges            = 0;
jmethodID proxy_updater::java_scatter_edges           = 0;
jmethodID proxy_updater::java_consistency             = 0;
jmethodID proxy_updater::java_gather_consistency      = 0;
jmethodID proxy_updater::java_scatter_consistency     = 0;
jmethodID proxy_updater::java_init_gather             = 0;
jmethodID proxy_updater::java_gather                  = 0;
jmethodID proxy_updater::java_merge                   = 0;
jmethodID proxy_updater::java_apply                   = 0;
jmethodID proxy_updater::java_scatter                 = 0;

JNIEXPORT void JNICALL
  Java_org_graphlab_Updater_initNative
  (JNIEnv *env, jclass clazz){

  proxy_updater::java_update =
    env->GetMethodID(clazz, "update", "(JLorg/graphlab/data/Vertex;)V");

  proxy_updater::java_add =
    env->GetMethodID(clazz, "add", "(Lorg/graphlab/Updater;)V");

  proxy_updater::java_priority = 
    env->GetMethodID(clazz, "priority", "()D");

  proxy_updater::java_clone = 
    env->GetMethodID(clazz, "clone", "()Lorg/graphlab/Updater;");

  proxy_updater::java_is_factorizable =
    env->GetMethodID(clazz, "isFactorizable", "()Z");

  proxy_updater::java_gather_edges =
    env->GetMethodID(clazz, "gatherEdges", "()I");

  proxy_updater::java_scatter_edges =
    env->GetMethodID(clazz, "scatterEdges", "()I");

  proxy_updater::java_consistency =
    env->GetMethodID(clazz, "consistency", "()I");

  proxy_updater::java_gather_consistency =
    env->GetMethodID(clazz, "gatherConsistency", "()I");

  proxy_updater::java_scatter_consistency =
    env->GetMethodID(clazz, "scatterConsistency", "()I");

  proxy_updater::java_init_gather = 
    env->GetMethodID(clazz, "initGather", "()V");

  proxy_updater::java_gather =
    env->GetMethodID(clazz, "gather", "(Ljava/lang/Object;)V");

  proxy_updater::java_merge = 
    env->GetMethodID(clazz, "merge", "(Lorg/graphlab/Updater;)V");

  proxy_updater::java_apply =
    env->GetMethodID(clazz, "apply", "(Lorg/graphlab/data/Vertex;)V");

  proxy_updater::java_scatter = 
    env->GetMethodID(clazz, "scatter",
    "(JLjava/lang/Object;)V");
  
}

//---------------------------------------------------------------
// proxy_updater instance members
//---------------------------------------------------------------

proxy_updater::
  proxy_updater(JNIEnv *env, jobject &obj)
  : java_any(env, obj){}

proxy_updater::proxy_updater(){}

proxy_updater::
  proxy_updater(const proxy_updater& other){

  // other doesn't have an existing ref
  if (NULL == other.obj()){
    set_obj(NULL);
    return;
  }
  
  // clone the java object
  JNIEnv *env = core::get_jni_env();
  set_obj(env->CallObjectMethod(other.obj(), java_clone));
  
}

proxy_updater &proxy_updater::operator=(const proxy_updater& other){
    
  if (this == &other) return *this;
  java_any::operator=(other);
  return *this;
  
}

proxy_updater::~proxy_updater(){}

//---------------------------------------------------------------
// proxy_updater instance members - the update function
//---------------------------------------------------------------

void proxy_updater::operator()(icontext_type& context){
  
  jobject vertex = context.const_vertex_data().obj();
  if (NULL == vertex) return; // BUG?
  
  // forward call to org.graphlab.Updater#update
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod (obj(), java_update,
                       &context,
                       vertex);
  handle_exception(env);

}

//---------------------------------------------------------------
// proxy_updater instance members - the add function
//---------------------------------------------------------------

void proxy_updater::operator+=(const proxy_updater& other) const {

  // forward call to org.graphlab.Updater#add
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod (obj(), java_add, other.obj());
  handle_exception(env);

}

bool proxy_updater::is_factorizable() const {
  JNIEnv *env = core::get_jni_env();
  bool factorizable = env->CallBooleanMethod(obj(), java_is_factorizable);
  handle_exception(env);
  return factorizable;
}

edge_set proxy_updater::gather_edges() const {
  JNIEnv *env = core::get_jni_env();
  int e = env->CallIntMethod(obj(), java_gather_edges);
  handle_exception(env);
  switch(e){
    case 0:  return IN_EDGES;
    case 1:  return OUT_EDGES;
    case 2:  return ALL_EDGES;
    default: return NO_EDGES;
  }
}

edge_set proxy_updater::scatter_edges() const {
  JNIEnv *env = core::get_jni_env();
  int e = env->CallIntMethod(obj(), java_scatter_edges);
  handle_exception(env);
  switch(e){
    case 0:  return IN_EDGES;
    case 1:  return OUT_EDGES;
    case 2:  return ALL_EDGES;
    default: return NO_EDGES;
  }
}

consistency_model proxy_updater::consistency() const {
  JNIEnv *env = core::get_jni_env();
  int c = env->CallIntMethod(obj(), java_consistency);
  handle_exception(env);
  switch(c){
    case 0:  return NULL_CONSISTENCY;
    case 1:  return VERTEX_CONSISTENCY;
    case 2:  return EDGE_CONSISTENCY;
    case 3:  return FULL_CONSISTENCY;
    default: return DEFAULT_CONSISTENCY;
  }
}

consistency_model proxy_updater::gather_consistency() const {
  JNIEnv *env = core::get_jni_env();
  int c = env->CallIntMethod(obj(), java_gather_consistency);
  handle_exception(env);
  switch(c){
    case 0:  return NULL_CONSISTENCY;
    case 1:  return VERTEX_CONSISTENCY;
    case 2:  return EDGE_CONSISTENCY;
    case 3:  return FULL_CONSISTENCY;
    default: return DEFAULT_CONSISTENCY;
  }
}

consistency_model proxy_updater::scatter_consistency() const {
  JNIEnv *env = core::get_jni_env();
  int c = env->CallIntMethod(obj(), java_scatter_consistency);
  handle_exception(env);
  switch(c){
    case 0:  return NULL_CONSISTENCY;
    case 1:  return VERTEX_CONSISTENCY;
    case 2:  return EDGE_CONSISTENCY;
    case 3:  return FULL_CONSISTENCY;
    default: return DEFAULT_CONSISTENCY;
  }
}

void proxy_updater::init_gather(icontext_type& context) {
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod(obj(), java_init_gather);
  handle_exception(env);
}

void proxy_updater::gather(icontext_type& context, const edge_type& edge){
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod(obj(), java_gather, context.const_edge_data(edge).obj());
  handle_exception(env);
}
 
void proxy_updater::merge(const update_functor_type& other){
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod(obj(), java_merge, other.obj());
  handle_exception(env);
}

void proxy_updater::apply(icontext_type& context){
  jobject vertex = context.const_vertex_data().obj();
  if (NULL == vertex) return; // BUG?
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod(obj(), java_apply, vertex);
  handle_exception(env);
}
 
void proxy_updater::scatter(icontext_type& context, const edge_type& edge){
  JNIEnv *env = core::get_jni_env();
  env->CallVoidMethod(obj(), java_scatter,
    &context, context.const_edge_data(edge).obj());
  handle_exception(env);
}


================================================
FILE: src/graphlab/jni/org_graphlab_Updater.h
================================================
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_graphlab_Updater */

#ifndef _Included_org_graphlab_Updater
#define _Included_org_graphlab_Updater
#ifdef __cplusplus
extern "C" {
#endif
#undef org_graphlab_Updater_IN_EDGES
#define org_graphlab_Updater_IN_EDGES 0L
#undef org_graphlab_Updater_OUT_EDGES
#define org_graphlab_Updater_OUT_EDGES 1L
#undef org_graphlab_Updater_ALL_EDGES
#define org_graphlab_Updater_ALL_EDGES 2L
#undef org_graphlab_Updater_NO_EDGES
#define org_graphlab_Updater_NO_EDGES 3L
#undef org_graphlab_Updater_NULL_CONSISTENCY
#define org_graphlab_Updater_NULL_CONSISTENCY 0L
#undef org_graphlab_Updater_VERTEX_CONSISTENCY
#define org_graphlab_Updater_VERTEX_CONSISTENCY 1L
#undef org_graphlab_Updater_EDGE_CONSISTENCY
#define org_graphlab_Updater_EDGE_CONSISTENCY 2L
#undef org_graphlab_Updater_FULL_CONSISTENCY
#define org_graphlab_Updater_FULL_CONSISTENCY 3L
#undef org_graphlab_Updater_DEFAULT_CONSISTENCY
#define org_graphlab_Updater_DEFAULT_CONSISTENCY 4L
/*
 * Class:     org_graphlab_Updater
 * Method:    initNative
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_org_graphlab_Updater_initNative
  (JNIEnv *, jclass);

#ifdef __cplusplus
}
#endif
#endif


================================================
FILE: src/graphlab/jni/org_graphlab_Updater.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * @file org_graphlab_Updater.hpp
 * @author Jiunn Haur Lim <jiunnhal@cmu.edu>
 */

#ifndef ORG_GRAPHLAB_UPDATER_HPP
#define ORG_GRAPHLAB_UPDATER_HPP

#include <graphlab.hpp>
#include "java_any.hpp"
#include "org_graphlab_Core.hpp"
#include "org_graphlab_Updater.h"

namespace graphlab {

  /** Proxy edge */
  class proxy_edge : public java_any {
  public:
    /**
     * Creates a new proxy_edge and a new reference to the associated
     * Java Edge object (so that it doesn't get garbage collected.)
     * @param[in] env   JNI environment, which will be used to create the
     *                  reference to the Java object.
     * @param[in] obj   associated org.graphlab.Core object.
     */
    proxy_edge (JNIEnv *env, jobject &obj) : java_any (env, obj) {}
  };
  
  /** Proxy vertex */
  class proxy_vertex : public java_any {
  public:
    proxy_vertex () : java_any () {}
    /**
     * Creates a new proxy_vertex and a new reference to the associated
     * Java Vertex object (so that it doesn't get garbage collected.)
     * @param[in] env   JNI environment, which will be used to create the
     *                  reference to the Java object.
     * @param[in] obj   associated org.graphlab.Core object.
     */
    proxy_vertex (JNIEnv *env, jobject &obj) : java_any (env, obj) {}
  };
  
  /** Proxy graph */
  typedef graph<proxy_vertex, proxy_edge> proxy_graph;
  
  /**
   * Proxy updater.
   * Mirrors and forwards update calls to the corresponding Java updater.
   * The constructor creates a new reference to the Java object (so that it
   * doesn't get garbage collected.) The destructor will delete the reference
   * to allow the corresponding Java object to be garbaged collected. The copy
   * constructor clones the Java object.
   *
   * Note that multiple proxy_updaters may correspond to the same
   * org.graphlab.Updater object.
   */
  class proxy_updater : 
    public iupdate_functor<proxy_graph, proxy_updater>,
    public java_any {
    
  public:
  
    /** jni_core type that uses the proxy graph and the proxy updater */
    typedef jni_core<proxy_graph, proxy_updater> core;

    /** context type that uses the proxy graph and the proxy updater */
    typedef iupdate_functor<proxy_graph, proxy_updater>::icontext_type context;
  
    /** Method ID of org.graphlab.Updater#update */
    static jmethodID java_update;
    
    /** Method ID of org.graphlab.Updater#add */
    static jmethodID java_add;
    
    /** Method ID of org.graphlab.Updater#priority */
    static jmethodID java_priority;
    
    /** Method ID of org.graphlab.Updater#clone */
    static jmethodID java_clone;
    
    /** Method ID of org.graphlab.Updater#isFactorizable */
    static jmethodID java_is_factorizable;
    
    /** Method ID of org.graphlab.Updater#gatherEdges */
    static jmethodID java_gather_edges;
    
    /** Method ID of org.graphlab.Updater#scatterEdges */
    static jmethodID java_scatter_edges;
    
    /** Method ID of org.graphlab.Updater#consistency */
    static jmethodID java_consistency;
    
    /** Method ID of org.graphlab.Updater#gatherConsistency */
    static jmethodID java_gather_consistency;
    
    /** Method ID of org.graphlab.Updater#scatterConsistency */
    static jmethodID java_scatter_consistency;
    
    /** Method ID of org.graphlab.Updater#initGather */
    static jmethodID java_init_gather;
    
    /** Method ID of org.graphlab.Updater#gather */
    static jmethodID java_gather;
    
    /** Method ID of org.graphlab.Updater#merge */
    static jmethodID java_merge;
    
    /** Method ID of org.graphlab.Updater#apply */
    static jmethodID java_apply;
    
    /** Method ID of org.graphlab.Updater#scatter */
    static jmethodID java_scatter;
    
    /**
     * Constructor for proxy updater.
     * Initializes this object with the associated Java org.graphlab.Updater
     * object.
     * @param[in] env           JNI environment - used to create a new reference
     *                          to javaUpdater.
     * @param[in] java_updater  Java org.graphlab.Updater object. This constructor
     *                          will create a new reference to the object to prevent
     *                          garbage collection.
     */
    proxy_updater(JNIEnv *env, jobject &java_updater);
    
    /** The default constructor does nothing */
    proxy_updater();
    
    /**
     * Copy constructor for proxy_updater.
     * If \c other has a \c mjava_updater, creates a new reference to it.
     */
    proxy_updater(const proxy_updater& other);
    
    /**
     * Copy assignment operator for proxy_updater.
     * If \c other has a \c mjava_updater, creates a new reference to it.
     */
    proxy_updater &operator=(const proxy_updater &other);
    
    /**
     * Deletes the reference to the Java object so that it may be garbage
     * collected.
     */
    ~proxy_updater();
    
    void operator()(icontext_type& context);
    void operator+=(const update_functor_type& other) const;
    bool is_factorizable() const;
    edge_set gather_edges() const;
    edge_set scatter_edges() const;
    consistency_model consistency() const;
    consistency_model gather_consistency() const;
    consistency_model scatter_consistency() const;
    void init_gather(icontext_type& context);
    void gather(icontext_type& context, const edge_type& edge);
    void merge(const update_functor_type& other);
    void apply(icontext_type& context);
    void scatter(icontext_type& context, const edge_type& edge);
    
  };
  
}

#endif


================================================
FILE: src/graphlab/logger/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/logger/assertions.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ---
// This file contains #include information about logging-related stuff.
// Pretty much everybody needs to #include this file so that they can
// log various happenings.
//
#ifndef _ASSERTIONS_H_
#define _ASSERTIONS_H_

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>    // for write()
#endif
#include <string.h>    // for strlen(), strcmp()
#include <assert.h>
#include <errno.h>     // for errno
#include <sstream>
#include <cassert>
#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/fail_method.hpp>
#include <graphlab/logger/backtrace.hpp>

#include <boost/typeof/typeof.hpp>

extern void __print_back_trace();

// On some systems (like freebsd), we can't call write() at all in a
// global constructor, perhaps because errno hasn't been set up.
// Calling the write syscall is safer (it doesn't set errno), so we
// prefer that.  Note we don't care about errno for logging: we just
// do logging on a best-effort basis.
#define WRITE_TO_STDERR(buf, len) (logbuf(LOG_FATAL, buf, len))

// CHECK dies with a fatal error if condition is not true.  It is *not*
// controlled by NDEBUG, so the check will be executed regardless of
// compilation mode.  Therefore, it is safe to do things like:
//    CHECK(fp->Write(x) == 4)
#define CHECK(condition)                                                \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition  << std::endl;                \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)


// This prints errno as well.  errno is the posix defined last error
// number. See errno.h
#define PCHECK(condition)                                               \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      const int _PCHECK_err_no_ = errno;                                \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ": "                       \
        << strerror(err_no) << std::endl;                               \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)

// Helper macro for binary operators; prints the two values on error
// Don't use this macro directly in your code, use CHECK_EQ et al below

// WARNING: These don't compile correctly if one of the arguments is a pointer
// and the other is NULL. To work around this, simply static_cast NULL to the
// type of the desired pointer.
#if defined(__cplusplus) && __cplusplus >= 201103L
#define CHECK_OP(op, val1, val2)                                        \
  do {                                                                  \
    const auto _CHECK_OP_v1_ = val1;                            \
    const auto _CHECK_OP_v2_ = val2;              \
    if (__builtin_expect(!((_CHECK_OP_v1_) op                           \
                           (decltype(val1))(_CHECK_OP_v2_)), 0)) {        \
      logstream(LOG_ERROR)                                              \
        << "Check failed: "                                             \
        << #val1 << #op << #val2                                        \
        << "  ["                                                        \
        << _CHECK_OP_v1_                                                \
        << ' ' << #op << ' '                                            \
        << _CHECK_OP_v2_ << "]" << std::endl;                           \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)
#else
#define CHECK_OP(op, val1, val2)                                        \
  do {                                                                  \
    const typeof(val1) _CHECK_OP_v1_ = (typeof(val1))val1;              \
    const typeof(val2) _CHECK_OP_v2_ = (typeof(val2))val2;              \
    if (__builtin_expect(!((_CHECK_OP_v1_) op                           \
                           (typeof(val1))(_CHECK_OP_v2_)), 0)) {        \
      logstream(LOG_ERROR)                                              \
        << "Check failed: "                                             \
        << #val1 << #op << #val2                                        \
        << "  ["                                                        \
        << _CHECK_OP_v1_                                                \
        << ' ' << #op << ' '                                            \
        << _CHECK_OP_v2_ << "]" << std::endl;                           \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)
#endif

#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)

// Synonyms for CHECK_* that are used in some unittests.
#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
// As are these variants.
#define EXPECT_TRUE(cond)     CHECK(cond)
#define EXPECT_FALSE(cond)    CHECK(!(cond))
#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)


#define ASSERT_MSG(condition, fmt, ...)                                 \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ":\n";                     \
      logger(LOG_ERROR, fmt, ##__VA_ARGS__);                            \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)

// Used for (libc) functions that return -1 and set errno
#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)

// A few more checks that only happen in debug mode
#ifdef NDEBUG
#define DCHECK_EQ(val1, val2)
#define DCHECK_NE(val1, val2)
#define DCHECK_LE(val1, val2)
#define DCHECK_LT(val1, val2)
#define DCHECK_GE(val1, val2)
#define DCHECK_GT(val1, val2)
#define DASSERT_TRUE(cond)
#define DASSERT_FALSE(cond)
#define DASSERT_EQ(val1, val2)
#define DASSERT_NE(val1, val2)
#define DASSERT_LE(val1, val2)
#define DASSERT_LT(val1, val2)
#define DASSERT_GE(val1, val2)
#define DASSERT_GT(val1, val2)

#define DASSERT_MSG(condition, fmt, ...)

#else
#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
#define DASSERT_TRUE(cond)     ASSERT_TRUE(cond)
#define DASSERT_FALSE(cond)    ASSERT_FALSE(cond)
#define DASSERT_EQ(val1, val2) ASSERT_EQ(val1, val2)
#define DASSERT_NE(val1, val2) ASSERT_NE(val1, val2)
#define DASSERT_LE(val1, val2) ASSERT_LE(val1, val2)
#define DASSERT_LT(val1, val2) ASSERT_LT(val1, val2)
#define DASSERT_GE(val1, val2) ASSERT_GE(val1, val2)
#define DASSERT_GT(val1, val2) ASSERT_GT(val1, val2)


#define DASSERT_MSG(condition, fmt, ...)                                \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ":\n";                     \
      logger(LOG_ERROR, fmt, ##__VA_ARGS__);                            \
      __print_back_trace();                                             \
      GRAPHLAB_LOGGER_FAIL_METHOD("assertion failure");                    \
    }                                                                   \
  } while(0)

#endif


#ifdef ERROR
#undef ERROR      // may conflict with ERROR macro on windows
#endif

#endif // _LOGGING_H_


================================================
FILE: src/graphlab/logger/assertions.hpp.orig
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// Copyright (c) 2005, Google Inc.
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// 
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ---
// This file contains #include information about logging-related stuff.
// Pretty much everybody needs to #include this file so that they can
// log various happenings.
//
#ifndef _ASSERTIONS_H_
#define _ASSERTIONS_H_

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>    // for write()
#endif
#include <string.h>    // for strlen(), strcmp()
#include <assert.h>
#include <errno.h>     // for errno
#include <sstream>
#include <cassert>
#include <graphlab/logger/logger.hpp>

#include <boost/typeof/typeof.hpp>

extern void __print_back_trace();

// On some systems (like freebsd), we can't call write() at all in a
// global constructor, perhaps because errno hasn't been set up.
// Calling the write syscall is safer (it doesn't set errno), so we
// prefer that.  Note we don't care about errno for logging: we just
// do logging on a best-effort basis.
#define WRITE_TO_STDERR(buf, len) (logbuf(LOG_FATAL, buf, len))

// CHECK dies with a fatal error if condition is not true.  It is *not*
// controlled by NDEBUG, so the check will be executed regardless of
// compilation mode.  Therefore, it is safe to do things like:
//    CHECK(fp->Write(x) == 4)
#define CHECK(condition)                                                \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition  << std::endl;                \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)


// This prints errno as well.  errno is the posix defined last error
// number. See errno.h
#define PCHECK(condition)                                               \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      const int _PCHECK_err_no_ = errno;                                \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ": "                       \
        << strerror(err_no) << std::endl;                               \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)

// Helper macro for binary operators; prints the two values on error
// Don't use this macro directly in your code, use CHECK_EQ et al below

// WARNING: These don't compile correctly if one of the arguments is a pointer
// and the other is NULL. To work around this, simply static_cast NULL to the
// type of the desired pointer.
#if defined(__cplusplus) && __cplusplus >= 201103L
#define CHECK_OP(op, val1, val2)                                        \
  do {                                                                  \
    const auto _CHECK_OP_v1_ = val1;                            \
    const auto _CHECK_OP_v2_ = val2;              \
    if (__builtin_expect(!((_CHECK_OP_v1_) op                           \
                           (decltype(val1))(_CHECK_OP_v2_)), 0)) {        \
      logstream(LOG_ERROR)                                              \
        << "Check failed: "                                             \
        << #val1 << #op << #val2                                        \
        << "  ["                                                        \
        << _CHECK_OP_v1_                                                \
        << ' ' << #op << ' '                                            \
        << _CHECK_OP_v2_ << "]" << std::endl;                           \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)
#else
#define CHECK_OP(op, val1, val2)                                        \
  do {                                                                  \
    const typeof(val1) _CHECK_OP_v1_ = (typeof(val1))val1;              \
    const typeof(val2) _CHECK_OP_v2_ = (typeof(val2))val2;              \
    if (__builtin_expect(!((_CHECK_OP_v1_) op                           \
                           (typeof(val1))(_CHECK_OP_v2_)), 0)) {        \
      logstream(LOG_ERROR)                                              \
        << "Check failed: "                                             \
        << #val1 << #op << #val2                                        \
        << "  ["                                                        \
        << _CHECK_OP_v1_                                                \
        << ' ' << #op << ' '                                            \
        << _CHECK_OP_v2_ << "]" << std::endl;                           \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)
#endif

#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2)
#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2)
#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2)
#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2)
#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2)
#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2)

// Synonyms for CHECK_* that are used in some unittests.
#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2)
#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2)
#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2)
#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2)
#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2)
#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2)
#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2)
#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2)
#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2)
#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2)
#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2)
#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2)
// As are these variants.
#define EXPECT_TRUE(cond)     CHECK(cond)
#define EXPECT_FALSE(cond)    CHECK(!(cond))
#define EXPECT_STREQ(a, b)    CHECK(strcmp(a, b) == 0)
#define ASSERT_TRUE(cond)     EXPECT_TRUE(cond)
#define ASSERT_FALSE(cond)    EXPECT_FALSE(cond)
#define ASSERT_STREQ(a, b)    EXPECT_STREQ(a, b)


#define ASSERT_MSG(condition, fmt, ...)                                 \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ":\n";                     \
      logger(LOG_ERROR, fmt, ##__VA_ARGS__);                            \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)

// Used for (libc) functions that return -1 and set errno
#define CHECK_ERR(invocation)  PCHECK((invocation) != -1)

// A few more checks that only happen in debug mode
#ifdef NDEBUG
#define DCHECK_EQ(val1, val2)
#define DCHECK_NE(val1, val2)
#define DCHECK_LE(val1, val2)
#define DCHECK_LT(val1, val2)
#define DCHECK_GE(val1, val2)
#define DCHECK_GT(val1, val2)
#define DASSERT_TRUE(cond)
#define DASSERT_FALSE(cond)
#define DASSERT_MSG(condition, fmt, ...)

#else
#define DCHECK_EQ(val1, val2)  CHECK_EQ(val1, val2)
#define DCHECK_NE(val1, val2)  CHECK_NE(val1, val2)
#define DCHECK_LE(val1, val2)  CHECK_LE(val1, val2)
#define DCHECK_LT(val1, val2)  CHECK_LT(val1, val2)
#define DCHECK_GE(val1, val2)  CHECK_GE(val1, val2)
#define DCHECK_GT(val1, val2)  CHECK_GT(val1, val2)
#define DASSERT_TRUE(cond)     ASSERT_TRUE(cond)
#define DASSERT_FALSE(cond)    ASSERT_FALSE(cond)
#define DASSERT_MSG(condition, fmt, ...)                                \
  do {                                                                  \
    if (__builtin_expect(!(condition), 0)) {                            \
      logstream(LOG_ERROR)                                              \
        << "Check failed: " << #condition << ":\n";                     \
      logger(LOG_ERROR, fmt, ##__VA_ARGS__);                            \
      __print_back_trace();                                             \
      throw("assertion failure");                                       \
    }                                                                   \
  } while(0)

#endif


#ifdef ERROR
#undef ERROR      // may conflict with ERROR macro on windows
#endif

#endif // _LOGGING_H_


================================================
FILE: src/graphlab/logger/backtrace.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <execinfo.h>
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <cxxabi.h>
#include <pthread.h>

/** Code from http://mykospark.net/2009/09/runtime-backtrace-in-c-with-name-demangling/ */
std::string demangle(const char* symbol) {
  size_t size;
  int status;
  char temp[1024];
  char* demangled;
  //first, try to demangle a c++ name
  if (1 == sscanf(symbol, "%*[^(]%*[^_]%127[^)+]", temp)) {
    if (NULL != (demangled = abi::__cxa_demangle(temp, NULL, &size, &status))) {
      std::string result(demangled);
      free(demangled);
      return result;
    }
  }
  //if that didn't work, try to get a regular c symbol
  if (1 == sscanf(symbol, "%127s", temp)) {
    return temp;
  }

  //if all else fails, just return the symbol
  return symbol;
}


static pthread_mutex_t back_trace_file_lock = PTHREAD_MUTEX_INITIALIZER;
static size_t write_count = 0;
static bool write_error = 0;
static int backtrace_file_number = 0;


extern void __set_back_trace_file_number(int number) {
  backtrace_file_number = number;
}

/* Obtain a backtrace and print it to ofile. */
void __print_back_trace() {
    void    *array[1024];
    size_t  size, i;
    char    **strings;

    pthread_mutex_lock(&back_trace_file_lock);

    if (write_error) {
      pthread_mutex_unlock(&back_trace_file_lock);
      return;
    }
    char filename[1024];
    sprintf(filename, "backtrace.%d", backtrace_file_number);

    FILE* ofile = NULL;
    if (write_count == 0) {
      ofile = fopen(filename, "w");
    }
    else {
      ofile = fopen(filename, "a");
    }
    // if unable to open the file for output
    if (ofile == NULL) {
      // print an error, set the error flag so we don't ever print it again
      fprintf(stderr, "Unable to open output backtrace file.\n");
      write_error = 1;
      pthread_mutex_unlock(&back_trace_file_lock);
      return;
    }
    ++write_count;

    size = backtrace(array, 1024);
    strings = backtrace_symbols(array, size);

    fprintf(ofile, "Pointers\n");
    fprintf(ofile, "------------\n");
    for (i = 0; i < size; ++i) {
        fprintf(ofile, "%p\n", array[i]);
    }


    fprintf(ofile, "Raw\n");
    fprintf(ofile, "------------\n");
    for (i = 0; i < size; ++i) {
        fprintf(ofile, "%s\n", strings[i]);
    }
    fprintf(ofile, "\nDemangled\n");
    fprintf(ofile, "------------\n");

    for (i = 0; i < size; ++i) {
        std::string ret = demangle(strings[i]);
        fprintf(ofile, "%s\n", ret.c_str());
    }
    free(strings);

    fprintf(ofile, "-------------------------------------------------------\n");
    fprintf(ofile, "\n\n");

    fclose(ofile);
    pthread_mutex_unlock(&back_trace_file_lock);
}


================================================
FILE: src/graphlab/logger/backtrace.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BACKTRACE_HPP
#define GRAPHLAB_BACKTRACE_HPP

extern void __set_back_trace_file_number(int number);
extern void __print_back_trace();

#endif


================================================
FILE: src/graphlab/logger/fail_method.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_LOGGER_FAIL_METHOD

#ifdef GRAPHLAB_LOGGER_THROW_ON_FAILURE
#define GRAPHLAB_LOGGER_FAIL_METHOD(str) throw(str)
#else
#define GRAPHLAB_LOGGER_FAIL_METHOD(str) abort()
#endif

#endif


================================================
FILE: src/graphlab/logger/logger.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/logger/logger.hpp>
#include <cstdarg>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>
#include <pthread.h>
#include <graphlab/logger/backtrace.hpp>

file_logger& global_logger() {
  static file_logger l;
  return l;
}


void streambuffdestructor(void* v){
  logger_impl::streambuff_tls_entry* t =
    reinterpret_cast<logger_impl::streambuff_tls_entry*>(v);
  delete t;
}

const char* messages[] = {  "DEBUG:    ",
                            "DEBUG:    ",
                            "INFO:     ",
                            "INFO:     ",
                            "WARNING:  ",
                            "ERROR:    ",
                            "FATAL:    "};


file_logger::file_logger() {
  log_file = "";
  log_to_console = true;
  log_level = LOG_EMPH;
  pthread_mutex_init(&mut, NULL);
  pthread_key_create(&streambuffkey, streambuffdestructor);
}

file_logger::~file_logger() {
  if (fout.good()) {
    fout.flush();
    fout.close();
  }

  pthread_mutex_destroy(&mut);
}

bool file_logger::set_log_file(std::string file) {
  // close the file if it is open
  if (fout.good()) {
    fout.flush();
    fout.close();
    log_file = "";
  }
  // if file is not an empty string, open the new file
  if (file.length() > 0) {
    fout.open(file.c_str());
    if (fout.fail()) return false;
    log_file = file;
  }
  return true;
}


#define RESET   0
#define BRIGHT    1
#define DIM   2
#define UNDERLINE   3
#define BLINK   4
#define REVERSE   7
#define HIDDEN    8

#define BLACK     0
#define RED   1
#define GREEN   2
#define YELLOW    3
#define BLUE    4
#define MAGENTA   5
#define CYAN    6
#define WHITE   7

void textcolor(FILE* handle, int attr, int fg)
{
  char command[13];
  /* Command is the control command to the terminal */
  sprintf(command, "%c[%d;%dm", 0x1B, attr, fg + 30);
  fprintf(handle, "%s", command);
}

void reset_color(FILE* handle)
{
  char command[20];
  /* Command is the control command to the terminal */
  sprintf(command, "%c[0m", 0x1B);
  fprintf(handle, "%s", command);
}


void file_logger::_log(int lineloglevel,const char* file,const char* function,
                       int line,const char* fmt, va_list ap ){
  // if the logger level fits
  if (lineloglevel >= log_level){
    // get just the filename. this line found on a forum on line.
    // claims to be from google.
    file = ((strrchr(file, '/') ? : file- 1) + 1);

    char str[1024];

    // write the actual header
    int byteswritten = snprintf(str,1024, "%s%s(%s:%d): ",
                                messages[lineloglevel],file,function,line);
    // write the actual logger

    byteswritten += vsnprintf(str + byteswritten,1024 - byteswritten,fmt,ap);

    str[byteswritten] = '\n';
    str[byteswritten+1] = 0;
    // write the output
    if (fout.good()) {
      pthread_mutex_lock(&mut);
      fout << str;;
      pthread_mutex_unlock(&mut);
    }
    if (log_to_console) {
#ifdef COLOROUTPUT
      if (lineloglevel == LOG_FATAL) {
        textcolor(stderr, BRIGHT, RED);
      }
      else if (lineloglevel == LOG_ERROR) {
        textcolor(stderr, BRIGHT, RED);
      }
      else if (lineloglevel == LOG_WARNING) {
        textcolor(stderr, BRIGHT, MAGENTA);
      }
      else if (lineloglevel == LOG_EMPH) {
        textcolor(stderr, BRIGHT, GREEN);
      }
#endif
      std::cerr << str;;
#ifdef COLOROUTPUT
      reset_color(stderr);
#endif
    }
  }
}


void file_logger::_logbuf(int lineloglevel,const char* file,const char* function,
                          int line,const char* buf, int len) {
  // if the logger level fits
  if (lineloglevel >= log_level){
    // get just the filename. this line found on a forum on line.
    // claims to be from google.
    file = ((strrchr(file, '/') ? : file- 1) + 1);

    // length of the 'head' of the string
    size_t headerlen = snprintf(NULL,0,"%s%s(%s:%d): ",
                                messages[lineloglevel],file,function,line);

    if (headerlen> 2047) {
      std::cerr << "Header length exceed buffer length!";
    }
    else {
      char str[2048];
      const char *newline="\n";
      // write the actual header
      int byteswritten = snprintf(str,2047,"%s%s(%s:%d): ",
                                  messages[lineloglevel],file,function,line);
      _lograw(lineloglevel,str, byteswritten);
      _lograw(lineloglevel,buf, len);
      _lograw(lineloglevel,newline, (int)strlen(newline));
    }
  }
}

void file_logger::_lograw(int lineloglevel, const char* buf, int len) {
  if (fout.good()) {
    pthread_mutex_lock(&mut);
    fout.write(buf,len);
    pthread_mutex_unlock(&mut);
  }
  if (log_to_console) {
#ifdef COLOROUTPUT
    pthread_mutex_lock(&mut);
    if (lineloglevel == LOG_FATAL) {
      textcolor(stderr, BRIGHT, RED);
    }
    else if (lineloglevel == LOG_ERROR) {
      textcolor(stderr, BRIGHT, RED);
    }
    else if (lineloglevel == LOG_WARNING) {
      textcolor(stderr, BRIGHT, MAGENTA);
    }
    else if (lineloglevel == LOG_DEBUG) {
      textcolor(stderr, BRIGHT, YELLOW);
    }
    else if (lineloglevel == LOG_EMPH) {
      textcolor(stderr, BRIGHT, GREEN);
    }
#endif
    std::cerr.write(buf,len);
#ifdef COLOROUTPUT

    pthread_mutex_unlock(&mut);
    reset_color(stderr);
#endif
  }
}

file_logger& file_logger::start_stream(int lineloglevel,const char* file,
                                       const char* function, int line, bool do_start) {
  // get the stream buffer
  logger_impl::streambuff_tls_entry* streambufentry =
        reinterpret_cast<logger_impl::streambuff_tls_entry*>(
                              pthread_getspecific(streambuffkey));
  // create the key if it doesn't exist
  if (streambufentry == NULL) {
    streambufentry = new logger_impl::streambuff_tls_entry;
    pthread_setspecific(streambuffkey, streambufentry);
  }
  std::stringstream& streambuffer = streambufentry->streambuffer;
  bool& streamactive = streambufentry->streamactive;

  if (lineloglevel >= log_level){
    // get the stream buffer
    // if do not start the stream, just quit
    if (do_start == false) {
      streamactive = false;
      return *this;
    }

    file = ((strrchr(file, '/') ? : file- 1) + 1);

    if (streambuffer.str().length() == 0) {
      streambuffer << messages[lineloglevel] << file
                   << "(" << function << ":" <<line<<"): ";
    }
    streamactive = true;
    streamloglevel = lineloglevel;
  } else {
    streamactive = false;
  }
  return *this;
}


================================================
FILE: src/graphlab/logger/logger.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * @file logger.hpp
 * Usage:
 * First include logger.hpp. To logger, use the logger() function
 * There are 2 output levels. A "soft" output level which is
 * set by calling global_logger.set_log_level(), as well as a "hard" output
 * level OUTPUTLEVEL which is set in the source code (logger.h).
 *
 * when you call "logger()" with a loglevel and if the loglevel is greater than
 * both of the output levels, the string will be written.
 * written to a logger file. Otherwise, logger() has no effect.
 *
 * The difference between the hard level and the soft level is that the
 * soft level can be changed at runtime, while the hard level optimizes away
 * logging calls at compile time.
 */

#ifndef GRAPHLAB_LOG_LOG_HPP
#define GRAPHLAB_LOG_LOG_HPP
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <cstdarg>
#include <pthread.h>
#include <graphlab/util/timer.hpp>
#include <graphlab/logger/fail_method.hpp>
#include <graphlab/logger/backtrace.hpp>

/**
 * \def LOG_FATAL
 *   Used for fatal and probably irrecoverable conditions
 * \def LOG_ERROR
 *   Used for errors which are recoverable within the scope of the function
 * \def LOG_WARNING
 *   Logs interesting conditions which are probably not fatal
 * \def LOG_EMPH
 *   Outputs as LOG_INFO, but in LOG_WARNING colors. Useful for
 *   outputting information you want to emphasize.
 * \def LOG_INFO
 *   Used for providing general useful information
 * \def LOG_DEBUG
 *   Debugging purposes only
 * \def LOG_EVERYTHING
 *   Log everything 
 */
// sgr - needed additional debug levels. I can undo this change if 
// necessary. although it seems to me that log levels should count 
// up and saturate so the messages label array can always be used.
#define LOG_NONE 7
#define LOG_FATAL 6
#define LOG_ERROR 5
#define LOG_WARNING 4
#define LOG_EMPH 3
#define LOG_INFO 2
#define LOG_DEBUG 1
#define LOG_EVERYTHING 0 // technically unsigned int

/**
 * \def OUTPUTLEVEL
 *  The minimum level to logger at
 * \def LOG_NONE
 *  OUTPUTLEVEL to LOG_NONE to disable logging
 */

#ifndef OUTPUTLEVEL
#define OUTPUTLEVEL LOG_DEBUG
#endif
/// If set, logs to screen will be printed in color
#define COLOROUTPUT


/**
 * \def logger(lvl,fmt,...)
 *    extracts the filename, line number
 *     and function name and calls _log. It will be optimized
 *     away if LOG_NONE is set
 *     This relies on a few compiler macros. As far as I know, these
 *     macros are pretty standard among most other C++ compilers.
 */
#if OUTPUTLEVEL == LOG_NONE
// totally disable logging
#define logger(lvl,fmt,...)
#define logbuf(lvl,fmt,...)
#define logstream(lvl) if(0) null_stream()

#define logger_once(lvl,fmt,...)
#define logstream_once(lvl) if(0) null_stream()

#define logger_ontick(sec,lvl,fmt,...)
#define logstream_ontick(sec, lvl) if(0) null_stream()

#else

#define logger(lvl,fmt,...)                 \
    (log_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__,fmt,##__VA_ARGS__))


#define logbuf(lvl,buf,len)                 \
    (log_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__,     \
                        __func__ ,__LINE__,buf,len))

#define logstream(lvl)                      \
    if(lvl >= global_logger().get_log_level()) (log_stream_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__) )

#define logger_once(lvl,fmt,...)                 \
{    \
  static bool __printed__ = false;    \
  if (!__printed__) {                 \
    __printed__ = true;               \
    (log_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__,fmt,##__VA_ARGS__)); \
  }  \
}

#define logstream_once(lvl)                      \
(*({    \
  static bool __printed__ = false;    \
  bool __prev_printed__ = __printed__; \
  if (!__printed__) __printed__ = true;  \
  &(log_stream_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__, !__prev_printed__) ); \
}))

#define logger_ontick(sec,lvl,fmt,...)                 \
{    \
  static float last_print = -sec - 1;        \
  float curtime = graphlab::timer::approx_time_seconds(); \
  if (last_print + sec <= curtime) {                 \
    last_print = curtime;                     \
    (log_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__,fmt,##__VA_ARGS__)); \
  }  \
}

#define logstream_ontick(sec,lvl)                      \
(*({    \
  static float last_print = -sec - 1;        \
  float curtime = graphlab::timer::approx_time_seconds();        \
  bool print_now = false;             \
  if (last_print + sec <= curtime) {                 \
    last_print = curtime;                 \
    print_now = true;                \
  }                    \
  &(log_stream_dispatch<(lvl >= OUTPUTLEVEL)>::exec(lvl,__FILE__, __func__ ,__LINE__, print_now) ); \
}))


#endif

namespace logger_impl {
struct streambuff_tls_entry {
  std::stringstream streambuffer;
  bool streamactive;
};
}


extern void __print_back_trace();

/**
  logging class.
  This writes to a file, and/or the system console.
*/
class file_logger{
 public:
  /** Default constructor. By default, log_to_console is on,
      there is no logger file, and logger level is set to LOG_EMPH
  */
  file_logger();

  ~file_logger();   /// destructor. flushes and closes the current logger file

  /** Closes the current logger file if one exists.
      if 'file' is not an empty string, it will be opened and
      all subsequent logger output will be written into 'file'.
      Any existing content of 'file' will be cleared.
      Return true on success and false on failure.
  */
  bool set_log_file(std::string file);

  /// If consolelog is true, subsequent logger output will be written to stderr
  void set_log_to_console(bool consolelog) {
    log_to_console = consolelog;
  }

  /// Returns the current logger file.
  std::string get_log_file(void) {
    return log_file;
  }

  /// Returns true if output is being written to stderr
  bool get_log_to_console() {
    return log_to_console;
  }

  /// Returns the current logger level
  int get_log_level() {
    return log_level;
  }

  file_logger& start_stream(int lineloglevel,const char* file,const char* function, int line, bool do_start = true);

  template <typename T>
  file_logger& operator<<(T a) {
    // get the stream buffer
    logger_impl::streambuff_tls_entry* streambufentry = reinterpret_cast<logger_impl::streambuff_tls_entry*>(
                                          pthread_getspecific(streambuffkey));
    if (streambufentry != NULL) {
      std::stringstream& streambuffer = streambufentry->streambuffer;
      bool& streamactive = streambufentry->streamactive;

      if (streamactive) streambuffer << a;
    }
    return *this;
  }

  inline file_logger& operator<<(const char* a) {
    // get the stream buffer
    logger_impl::streambuff_tls_entry* streambufentry = reinterpret_cast<logger_impl::streambuff_tls_entry*>(
                                          pthread_getspecific(streambuffkey));
    if (streambufentry != NULL) {
      std::stringstream& streambuffer = streambufentry->streambuffer;
      bool& streamactive = streambufentry->streamactive;

      if (streamactive) {
        streambuffer << a;
        if (a[strlen(a)-1] == '\n') {
          stream_flush();
        }
      }
    }
    return *this;
  }

  inline file_logger& operator<<(std::ostream& (*f)(std::ostream&)){
    // get the stream buffer
    logger_impl::streambuff_tls_entry* streambufentry = reinterpret_cast<logger_impl::streambuff_tls_entry*>(
                                          pthread_getspecific(streambuffkey));
    if (streambufentry != NULL) {
      std::stringstream& streambuffer = streambufentry->streambuffer;
      bool& streamactive = streambufentry->streamactive;

      typedef std::ostream& (*endltype)(std::ostream&);
      if (streamactive) {
        if (endltype(f) == endltype(std::endl)) {
          streambuffer << "\n";
          stream_flush();
          if(streamloglevel == LOG_FATAL) {
            __print_back_trace();
            GRAPHLAB_LOGGER_FAIL_METHOD("LOG_FATAL encountered");
          }
        }
      }
    }
    return *this;
  }


  /** Sets the current logger level. All logging commands below the current
      logger level will not be written. */
  void set_log_level(int new_log_level) {
    log_level = new_log_level;
  }

  /**
  * logs the message if loglevel>=OUTPUTLEVEL
  * This function should not be used directly. Use logger()
  *
  * @param loglevel Type of message \see LOG_DEBUG LOG_INFO LOG_WARNING LOG_ERROR LOG_FATAL
  * @param file File where the logger call originated
  * @param function Function where the logger call originated
  * @param line Line number where the logger call originated
  * @param fmt printf format string
  * @param arg var args. The parameters that match the format string
  */
  void _log(int loglevel,const char* file,const char* function,
                int line,const char* fmt, va_list arg );


  void _logbuf(int loglevel,const char* file,const char* function,
                int line,  const char* buf, int len);

  void _lograw(int loglevel, const char* buf, int len);

  inline void stream_flush() {
    // get the stream buffer
    logger_impl::streambuff_tls_entry* streambufentry = reinterpret_cast<logger_impl::streambuff_tls_entry*>(
                                          pthread_getspecific(streambuffkey));
    if (streambufentry != NULL) {
      std::stringstream& streambuffer = streambufentry->streambuffer;

      streambuffer.flush();
      _lograw(streamloglevel,
              streambuffer.str().c_str(),
              (int)(streambuffer.str().length()));
      streambuffer.str("");
    }
  }
 private:
  std::ofstream fout;
  std::string log_file;

  pthread_key_t streambuffkey;

  int streamloglevel;
  pthread_mutex_t mut;

  bool log_to_console;
  int log_level;

};


file_logger& global_logger();

/**
Wrapper to generate 0 code if the output level is lower than the log level
*/
template <bool dostuff>
struct log_dispatch {};

template <>
struct log_dispatch<true> {
  inline static void exec(int loglevel,const char* file,const char* function,
                          int line,const char* fmt, ... ) {
    va_list argp;
    va_start(argp, fmt);
    global_logger()._log(loglevel, file, function, line, fmt, argp);
    va_end(argp);
    if(loglevel == LOG_FATAL) {
      __print_back_trace();
      GRAPHLAB_LOGGER_FAIL_METHOD("LOG_FATAL encountered");
    }
  }
};

template <>
struct log_dispatch<false> {
  inline static void exec(int loglevel,const char* file,const char* function,
                int line,const char* fmt, ... ) {}
};


struct null_stream {
  template<typename T>
  inline null_stream operator<<(T t) { return null_stream(); }
  inline null_stream operator<<(const char* a) { return null_stream(); }
  inline null_stream operator<<(std::ostream& (*f)(std::ostream&)) { return null_stream(); }
};


template <bool dostuff>
struct log_stream_dispatch {};

template <>
struct log_stream_dispatch<true> {
  inline static file_logger& exec(int lineloglevel,const char* file,const char* function, int line, bool do_start = true) {
    return global_logger().start_stream(lineloglevel, file, function, line, do_start);
  }
};

template <>
struct log_stream_dispatch<false> {
  inline static null_stream exec(int lineloglevel,const char* file,const char* function, int line, bool do_start = true) {
    return null_stream();
  }
};

void textcolor(FILE* handle, int attr, int fg);
void reset_color(FILE* handle);

#endif


================================================
FILE: src/graphlab/logger/logger_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/logger/assertions.hpp>
#include <graphlab/logger/logger.hpp>


================================================
FILE: src/graphlab/macros_def.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/foreach.hpp>
#include <stdint.h>

// if GNUC is available, this checks if the file which included
// macros_def.hpp is the same file which included macros_undef.hpp
#ifdef __GNUC__
#define GRAPHLAB_MACROS_INC_LEVEL __INCLUDE_LEVEL__
#endif


// prevent this file from being included before other graphlab headers
#ifdef GRAPHLAB_MACROS
#error "Repeated include of <macros_def.hpp>. This probably means that macros_def.hpp was not the last include, or some header file failed to include <macros_undef.hpp>"
#endif

#define GRAPHLAB_MACROS

/** A macro to disallow the copy constructor and operator= functions
    This should be used in the private: declarations for a class */
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&);               \
void operator=(const TypeName&);


// Shortcut macro definitions
//! see http://www.boost.org/doc/html/foreach.html 
#define foreach BOOST_FOREACH

#define rev_foreach BOOST_REVERSE_FOREACH


================================================
FILE: src/graphlab/macros_undef.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifdef __GNUC__
#if (GRAPHLAB_MACROS_INC_LEVEL != __INCLUDE_LEVEL__)
  #error "A <macros_def.hpp> was not paired with a <macros_undef.hpp>"  
#endif
#undef GRAPHLAB_MACROS_INC_LEVEL
#endif


#undef GRAPHLAB_MACROS
#undef DISALLOW_COPY_AND_ASSIGN
#undef foreach
#undef rev_foreach


================================================
FILE: src/graphlab/options/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/options/command_line_options.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#include <graphlab/options/command_line_options.hpp>
#include <graphlab/scheduler/scheduler_list.hpp>


namespace boost {  

  template<>
  std::string lexical_cast< std::string>(const std::vector<int>& vec) {
    return graphlab_vec_to_string(vec);
  }

  template<>
  std::string lexical_cast<std::string>(const std::vector<uint32_t>& vec) {
    return graphlab_vec_to_string(vec);
  }

  template<>
  std::string lexical_cast<std::string>(const std::vector<uint64_t>& vec) {
    return graphlab_vec_to_string(vec);
  }

  template<>
  std::string lexical_cast< std::string >(const std::vector<double>& vec) {
    return graphlab_vec_to_string(vec);
  }

  template<>
  std::string lexical_cast< std::string>(const std::vector<float>& vec) {
    return graphlab_vec_to_string(vec);
  }

  template<>
  std::string lexical_cast< std::string>(const std::vector<std::string>& vec) {
    return graphlab_vec_to_string(vec);
  }
};


namespace graphlab {
 
static const char* engine_help_string = 
#include <graphlab/options/engine_help.txt>
;

static const char* graph_help_string = 
#include <graphlab/options/graph_help.txt>
;


  bool command_line_options::parse(int argc, const char* const* argv,
                                   bool allow_unregistered) {
    namespace boost_po = boost::program_options;
    
    size_t ncpus(get_ncpus());
    std::string engine_opts_string;
    std::string schedulertype(get_scheduler_type());
    std::string scheduler_opts_string = "";
    std::string graph_opts_string = "";

    if(!suppress_graphlab_options) {
      // Set the program options
      desc.add_options()
        ("ncpus",
        boost_po::value<size_t>(&(ncpus))->
        default_value(ncpus),
        "Number of cpus to use per machine. Defaults to (#cores - 2)")
        ("scheduler",
          boost_po::value<std::string>(&(schedulertype))->
          default_value(schedulertype),
          (std::string("Supported schedulers are: "
          + get_scheduler_names_str() +
          ". Too see options for each scheduler, run the program with the option"
          " ---schedhelp=[scheduler_name]").c_str()))
        ("engine_opts",
        boost_po::value<std::string>(&(engine_opts_string))->
        default_value(engine_opts_string),
        "string of engine options i.e., \"timeout=100\"")
        ("graph_opts",
          boost_po::value<std::string>(&(graph_opts_string))->
          default_value(graph_opts_string),
          "String of graph options i.e., \"ingress=random\"")
        ("scheduler_opts",
          boost_po::value<std::string>(&(scheduler_opts_string))->
          default_value(scheduler_opts_string),
          "String of scheduler options i.e., \"strict=true\"")
        ("engine_help",
          boost_po::value<std::string>()->implicit_value(""),
          "Display help for engine options.")
        ("graph_help",
        boost_po::value<std::string>()->implicit_value(""),
        "Display help for the distributed graph.")
        ("scheduler_help",
          boost_po::value<std::string>()->implicit_value(""),
          "Display help for schedulers.");
    }
    // Parse the arguments
    try {
      std::vector<std::string> arguments;
      std::copy(argv + 1, argv + argc + !argc, 
                std::inserter(arguments, arguments.end()));

      boost_po::command_line_parser parser(arguments);
      parser.options(desc);
      if (allow_unregistered) parser.allow_unregistered();
      if (num_positional) parser.positional(pos_opts);
      boost_po::parsed_options parsed = parser.run();
      if (allow_unregistered) {
        unrecognized_options = 
            boost_po::collect_unrecognized(parsed.options, 
                                           boost_po::include_positional);
      } else {
        unrecognized_options.clear();
      }
      boost_po::store(parsed, vm);
      boost_po::notify(vm);
    } catch( boost_po::error error) {
      std::cout << "Invalid syntax:\n"
                << "\t" << error.what()
                << "\n\n" << std::endl
                << "Description:"
                << std::endl;
      print_description();
      return false;
    }
    if(vm.count("help")) {
      print_description();
      return false;
    }
    if (vm.count("scheduler_help")) {
      std::string schedname = vm["scheduler_help"].as<std::string>();
      if (schedname != "") {
        print_scheduler_info(schedname, std::cout);
      } else {
        std::vector<std::string> schednames = get_scheduler_names();
        for(size_t i = 0;i < schednames.size(); ++i) {
          print_scheduler_info(schednames[i], std::cout);
        }
      }
      return false;
    }    
    if (vm.count("engine_help")) {
      std::cout << engine_help_string; 
      return false;
    }
    if (vm.count("graph_help")) {
      std::cout << graph_help_string; 
      return false;
    } 
    set_ncpus(ncpus);

    set_scheduler_type(schedulertype);

    get_scheduler_args().parse_string(scheduler_opts_string);
    get_engine_args().parse_string(engine_opts_string);
    get_graph_args().parse_string(graph_opts_string);
    return true;
  } // end of parse


  bool command_line_options::is_set(const std::string& option) {
    return vm.count(option);
  }

  void command_line_options::add_positional(const std::string& str) {
    num_positional++;
    pos_opts.add(str.c_str(), 1);
  }
}


================================================
FILE: src/graphlab/options/command_line_options.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_COMMAND_LINE_OPTIONS
#define GRAPHLAB_COMMAND_LINE_OPTIONS

#include <string>
#include <vector>


#include <boost/program_options.hpp>


#include <graphlab/options/graphlab_options.hpp>


namespace boost {
  /**
    \ingroup util
    Converts a vector of any stream output-able type to a string
  */
  template<typename T>
  std::string graphlab_vec_to_string(const std::vector<T>& vec) {
    std::stringstream strm;
    strm << "{" ;
    for(size_t i = 0; i < vec.size(); ++i) {
      strm << vec[i];
      if(i < vec.size() - 1) strm << ", ";
    }
    strm << "}";
    return strm.str();
  }
  
  /**
   \ingroup util
   Provides lexical cast from vector<int> to string.
   Converts a vector of 1,2,3 to the string "{1, 2, 3}"
  */  
  template<>
  std::string lexical_cast< std::string>(const std::vector<int>& vec);

  /**
   \ingroup util
   Provides lexical cast from vector<int> to string.
   Converts a vector of 1,2,3 to the string "{1, 2, 3}"
  */  
  template<>
  std::string lexical_cast< std::string>(const std::vector<uint32_t>& vec);

  /**
   \ingroup util
   Provides lexical cast from vector<size_t> to string.
   Converts a vector of 1,2,3 to the string "{1, 2, 3}"
  */
  template<>
  std::string lexical_cast<std::string>(const std::vector<uint64_t>& vec);

  
  /**
   \ingroup util
   Provides lexical cast from vector<double> to string.
   Converts a vector of 1.1,2.2,3.3 to the string "{1.1, 2.2, 3.3}"
  */
  template<>
  std::string lexical_cast< std::string >(const std::vector<double>& vec);
  
  /**
   \ingroup util
   Provides lexical cast from vector<float> to string.
   Converts a vector of 1.1,2.2,3.3 to the string "{1.1, 2.2, 3.3}"
  */
  template<>
  std::string lexical_cast< std::string>(const std::vector<float>& vec);
  
  /**
   \ingroup util
   Provides lexical cast from vector<string> to string.
   Converts a vector of "hello", "world" to the string "{hello, world}"
  */
  template<>
  std::string lexical_cast< std::string>(const std::vector<std::string>& vec);

}; // end of namespace boost


namespace graphlab {
  /**
   * \ingroup util 
   *
   * \brief The GraphLab command line options class helps parse basic
   * command line options for the GraphLab framework as well as user
   * applications.
   *
   * Early in the development of GraphLab we realized that a lot of
   * time was spent writing code to parse the many GraphLab options as
   * well as each of the applications options.  In many cases we were
   * using the boost::program_options library which while very
   * powerful can also be fairly complicated.  
   *
   * As a consequence, we developed a simple command line options
   * object that parses the standard argv options capturing GraphLab
   * specific options and also processing users options.  GraphLab
   * command line tools to enable user applications to benefit from
   * sophisticated and still easy to use command line parsing.
   *
   * The command_line_options data-structure is built on top of the
   * boost::program_options library. We have tried to retain much of
   * the functionality of the boost::program_options library while
   * hiding some of the less "friendly" template meta-programming
   * "features".
   *
   *  Here is an example of how the library is used:
   *
   * \code
   * int main(int argc, char** argv) {
   *
   *   std::string filename;
   *   size_t dimensions = 20;
   *   double bound = 1E-5;
   *   bool use_x = false;
   *   std::vector<size_t> nsamples(1,10000);
   * 
   *   // Parse command line options
   *   graphlab::command_line_options clopts("Welcome to a the HelloWorld");
   *   clopts.attach_option("file", filename, "The input filename (required)");
   *   clopts.add_positional("file");
   *   clopts.attach_option("dim", dimensions,
   *                        "the dimension of the grid");
   *   clopts.attach_option("bound", bound,
   *                        "The termination bound");
   *   clopts.attach_option("usex", use_x,
   *                        "Use algorithm x");
   *   clopts.attach_option("nsamples", nsamples,
   *                        "A vector of the number of samples"); 
   *
   *   if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
   * 
   *   if(!clopts.is_set("file")) {
   *     std::cout << "Input file not provided" << std::endl;
   *     clopts.print_description();
   *     return EXIT_FAILURE;
   *   }
   * }
   * \endcode
   *
   */
  class command_line_options : public graphlab_options {

    boost::program_options::options_description desc;
    boost::program_options::positional_options_description 
        pos_opts;
    size_t num_positional;
    boost::program_options::variables_map vm;
    
    bool suppress_graphlab_options;
   
    std::vector<std::string> unrecognized_options;

  public:

    /**
     * \brief Construct a command options object with basic settings.  
     *
     * \param [in] desc_str The description of the program that is
     * printed when --help is invoked (in addition to all the options
     * and their descriptions).
     *
     * \param [in] suppress_graphlab_options If set to true the
     * standard GraphLab options are not parsed and the help screen.
     * only presents the users options.  This is useful in cases where
     * command line options are needed outside of GraphLab binary
     * (e.g., simple utilities).
     */
    command_line_options(std::string desc_str,
                         bool suppress_graphlab_options = false) : 
      desc(desc_str), num_positional(0),
      suppress_graphlab_options(suppress_graphlab_options) {     
      // Add documentation for help
      namespace boost_po = boost::program_options;      
      desc.add_options()("help", "Print this help message.");
    } // End constructor


    /// Print the same message that is printed when the --help command
    /// line argument is provided.
    inline void print_description() const { std::cout << desc << std::endl; }


    /**
     * \brief This function should be called AFTER all the options
     * have been seen (including positionals). The parse function
     * reads the standard command line arguments and fills in the
     * attached variables. If there is an error in the syntax or
     * parsing fails the parse routine will print the error and return
     * false.
     *
     * If allow_unregistered is set to true, will permit unrecognized options
     */
    bool parse(int argc, const char* const* argv, 
               bool allow_unregistered = false);

    /** 
     * \brief The is set function is used to test if the user provided
     * the option.  The option string should match one of the attached
     * options.
     */
    bool is_set(const std::string& option);


    /**
     * If allow_unregistered flag is set on parse
     * this will contain the list of unrecognized options
     */
    inline std::vector<std::string> unrecognized() const {
      return unrecognized_options;
    }

    /**
     * \brief attach a user defined option to the command line options
     * parser.
     *
     * The attach option command is used to attach a user defined
     * option to the command line options parser.
     *
     * \param [in] option The name of the command line flag for that
     * option.
     *
     * \param [in,out] ret_var A reference to an "arbitrary" type
     * which can be any of the basic types (char, int, size_t, float,
     * double, bool, string...) or an std::vector of basic types. It
     * is important that the ret_cont point to a memory block that
     * will exist when parse is invoked.  The default value is read
     * from the ret_cont
     *                
     * \param [in] description Used to describe the option when --help is
     * called or when print_description is invoked.
     */
    template<typename T>
    void attach_option(const std::string& option,
                       T& ret_var,
                       const std::string& description) {
      namespace boost_po = boost::program_options;
      desc.add_options()
        (option.c_str(), boost_po::value<T>(&ret_var)->default_value(ret_var), 
         description.c_str());
    } // end of attach_option


    // /**
    // \brief attach a user defined option to the command line options
    // parser.
    
    // The attach option command is used to attach a user defined option
    // to the command line options parser.
    
    // \param option The name of the command line flag for that option.

    // \param ret_cont A pointer to an "arbitrary" type which can be any
    //                 of the basic types (char, int, size_t, float,
    //                 double, bool, string...) or an std::vector of
    //                 basic types. It is important that the ret_cont
    //                 point to a memory block that will exist when parse
    //                 is invoked.

    // \param default_value The default value of the parameter if the
    //                      user does not provide this parameter on the
    //                      command line.

    // \param description Used to describe the option when --help 
    //       is called or when print_description is invoked.
    // */
    // template<typename T>
    // void attach_option(const std::string& option,
    //                    T* ret_cont,
    //                    const T& default_value, 
    //                    const std::string& description) {
    //   namespace boost_po = boost::program_options;
    //   assert(ret_cont != NULL);
    //   desc.add_options()
    //     (option.c_str(),
    //      boost_po::value<T>(ret_cont)->default_value(default_value),
    //      description.c_str());
    // }
    
    /** 
     * \brief This function adds the option as a positional argument.
     * A positional argument does not require --option and instead is
     * read based on its location. Each add_positional call adds to
     * the next position. 
     */
    void add_positional(const std::string& str);

    
  }; // end class command line options


}; // end namespace graphlab


#endif


================================================
FILE: src/graphlab/options/engine_help.txt
================================================
"Synchronous Engine (sync)\n"
"=========================\n"
"The synchronous engine executes all active vertex program\n"
"synchronously in a sequence of super-step (iterations) in both the\n"
"shared and distributed memory settings.\n"
"\n"
"max_iterations: (default: infinity) The maximum number\n"
"of iterations (super-steps) to run.\n"
"\n"
"timeout: (default: infinity) The maximum time in\n"
"seconds that the engine may run. When the time runs out the\n"
"current iteration is completed and then the engine terminates.\n"
"\n"
"use_cache: (default: false) This is used to enable\n"
"caching. The update function must be written in a specific way\n"
"to take advantage of this. See the documentation for details.\n"
"\n"
"snapshot_interval: (default: -1) If set to a positive value, a snapshot\n"
"is taken every this number of iterations. If set to 0, a snapshot\n"
"is taken before the first iteration. If set to a negative value,\n"
"no snapshots are taken. A snapshot is a binary dump of the graph.\n"
"\n"
"snapshot_path: If snapshot_interval is set to a value >=0,\n"
"this option must be specified and should contain a target basename \n"
"for the snapshot. The path including folder and file prefix in \n"
"which the snapshots should be saved.\n"
"\n"
"\n"
"Asynchronous Engine (async)\n"
"===========================\n"
"The asynchronous consistent engine executed vertex programs\n"
"asynchronously and can ensure mutual exclusion such that adjacent vertices\n"
"do not run simultaneously\n"
"timeout: (default: infinity) Maximum time in seconds the engine will\n"
"run for. The actual runtime may be marginally greater as the engine\n"
"waits for all threads and processes to flush all active tasks before\n"
"returning.\n"
"factorized: (default: true) Set to true to weaken the consistency\n"
"model to factorized consistency where only individual gather/apply/scatter\n"
"calls are guaranteed to be locally consistent. Can produce massive\n"
"increases in throughput at a consistency penalty.\n"
"nfibers: (default: 3000) Number of fibers to use\n"
"stacksize: (default: 16384) Stacksize of each fiber.\n"

"Warp Engine \n"
"===========================\n"
"The warp engine executes update functions under the warp system\n"
"asynchronously and can ensure mutual exclusion such that adjacent vertices\n"
"do not run simultaneously\n"
"timeout: (default: infinity) Maximum time in seconds the engine will\n"
"run for. The actual runtime may be marginally greater as the engine\n"
"waits for all threads and processes to flush all active tasks before\n"
"returning.\n"
"factorized: (default: true) Set to true to weaken the consistency\n"
"model to factorized consistency where only individual gather/apply/scatter\n"
"calls are guaranteed to be locally consistent. Can produce massive\n"
"increases in throughput at a consistency penalty.\n"
"nfibers: (default: 3000) Number of fibers to use\n"
"stacksize: (default: 16384) Stacksize of each fiber.\n"


================================================
FILE: src/graphlab/options/graph_help.txt
================================================
"Graph Options\n"
"==============\n"
"ingress: The graph partitioning method to use. May be \"random\",\n"
"\"grid\", \"pds\", \"oblivious\" or \"hdrf\". The methods are in"
"increasing complexity. \"random\" is the simplest and produces the \n"
"worst partitions, while \"hdrf\" takes the longest, but produces\n"
"a significantly better result.\n"
"\n"
"userecent: An optimization that can decrease memory utilization\n"
"of oblivious significantly at a small\n"
"partitioning penalty. Defaults to 0. Set to 1 to \n"
"enable.\n"
"\n"


================================================
FILE: src/graphlab/options/graphlab_options.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#ifndef GRAPHLAB_GRAPHLAB_OPTIONS_HPP
#define GRAPHLAB_GRAPHLAB_OPTIONS_HPP

#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <graphlab/options/options_map.hpp>
 
#include <graphlab/parallel/pthread_tools.hpp>
namespace graphlab {


  /**
   * The engine options class is really a simple struct that contains
   * the basic options needed to create an engine.  These options
   * include:
   
   <ul>

   <li> size_t ncpus: The number of cpus (threads) to use for this
   engine. </li>

   <li> std::string engine_type: The type of engine to use.  Currently
   we support {async,  synchronous}. </li>

   <li> std::string scheduler_type: The type of scheduler to user.
   Currently we support a wide range of schedulers: {synchronous,
   fifo, priority, sampling, splash,  sweep, multiqueue_fifo,
   multiqueue_priority,  set, clustered_priority, round_robin,
   chromatic} </li>

   <li> size_t splash_size: The size parameter for the splash
   scheduler. </li>
   </ul>
   */
  class graphlab_options {
  public:
    //! The number of cpus
    size_t ncpus;
    
    //! The type of scheduler to use
    std::string scheduler_type;
    
    //! additional arguments to the engine
    options_map engine_args;
    
    //! additional arguments to the scheduler
    options_map scheduler_args;

    //! Options for the graph
    options_map graph_args;

    graphlab_options() :
      ncpus(thread::cpu_count() > 2 ? (thread::cpu_count() - 2) : 2) {
      // Grab all the compiler flags 
      /* \todo: Add these back at some point
        #ifdef COMPILEFLAGS
        #define QUOTEME_(x) #x
        #define QUOTEME(x) QUOTEME_(x)
        compile_flags = QUOTEME(COMPILEFLAGS);
        #undef QUOTEME
        #undef QUOTEME_
        #endif 
      */
    } // end of constructor


    virtual ~graphlab_options() {}


    //! Set the number of cpus
    void set_ncpus(size_t n)
      {
#ifndef __NO_OPENMP__
          ncpus = n;
          omp_set_num_threads(ncpus);
#else
          ncpus = n;
#endif
      }

    //! Get the number of cpus
    size_t get_ncpus() const { return ncpus; }

    void set_scheduler_type(const std::string& stype) {
      //! \todo: ADD CHECKING
      scheduler_type = stype;
    }    


    //! Get the type of scheduler
    const std::string& get_scheduler_type() const {
      return scheduler_type;
    }

    //! Get the engine arguments
    const options_map& get_engine_args() const {
      return engine_args;
    }

    //! Get the engine arguments
    options_map& get_engine_args() {
      return engine_args;
    }

    const options_map& get_graph_args() const {
      return graph_args;
    }

    options_map& get_graph_args() {
      return graph_args;
    }     


    const options_map& get_scheduler_args() const {
      return scheduler_args;
    }

    options_map& get_scheduler_args() {
      return scheduler_args;
    }

    /**
     * Display the current engine options
     */
    virtual void print() const {
      std::cout << "GraphLab Options -------------------\n" 
                << "ncpus:       " << ncpus << "\n"
                << "scheduler:   " << scheduler_type << "\n";
      std::cout << "\n";
      std::cout << "Scheduler Options: \n";
      std::cout << scheduler_args;
      std::cout << "Graph Options: \n";
      std::cout << graph_args;
      std::cout << "Engine Options: \n";
      std::cout << engine_args;
      std::cout << std::endl;
    }


  };


}
#endif


================================================
FILE: src/graphlab/options/options_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#include <graphlab/options/command_line_options.hpp>
#include <graphlab/options/options_map.hpp>
#include <graphlab/options/graphlab_options.hpp>


================================================
FILE: src/graphlab/options/options_map.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#include <string>
#include <sstream>
#include <iostream>
#include <iomanip>

#include <graphlab/options/options_map.hpp>

namespace graphlab {
  
   
  void options_map::parse_string(std::string arguments) {
    std::pair<std::string, options_map> ret;
    // Break the string appart
    if(!arguments.empty()) {
      std::replace(arguments.begin(), arguments.end(), ',', ' ');
      std::replace(arguments.begin(), arguments.end(), ';', ' ');        
      std::stringstream arg_strm(arguments);
      bool ret = parse_options(arg_strm);
      if (ret == false) {
        logstream(LOG_FATAL) << "Malformed option. Failed to parse \"" 
                             << arguments << "\"" << std::endl;
      }
    }     
  }


  std::ostream& operator<<(std::ostream& out,
                           const graphlab::options_map& opts) {
    // save the format flags
    std::ios_base::fmtflags fmt = out.flags();
  
    std::map<std::string,
             graphlab::options_map::option_values>::const_iterator
      i = opts.options.begin();
    while(i != opts.options.end()) {    
      //out.setf(std::ios::left);
      out << std::setw(18) << std::left << i->first;    
      out << std::setw(2) << "= ";    
      //out.setf(std::ios::right);
      out << i->second.strval;
      out << std::endl;
      ++i;
    }
    // reset the format flags
    out.flags(fmt);
    out << std::endl;
    return out;
  }

}; // end of namespace graphlab


================================================
FILE: src/graphlab/options/options_map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#ifndef GRAPHLAB_OPTIONS_MAP_HPP
#define GRAPHLAB_OPTIONS_MAP_HPP
#include <map>
#include <sstream>
#include <ostream>
#include <istream>
#include <boost/lexical_cast.hpp>
#include <graphlab/logger/logger.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/generics/robust_cast.hpp>

namespace graphlab {

  /**
     options data structure.  Defines a collection of key->value pairs
     where the key is a string, and the value is an arbitrary data
     type.  The options_map class will invisibly cast between string,
     integer and double data types.
  */
  class options_map {
  public:
   
    options_map() {};

    explicit options_map(std::string &s) {
      parse_string(s);
    };

    /**
     * Add an option -> value pair where value is a string.
     * Don't use. set_option() prefered.
     */
    inline void set_option_str(const std::string &opt,
                               const std::string &val) {
      options[opt].strval = val;
      try {
        options[opt].intval = boost::lexical_cast<int>(val);
      } catch(boost::bad_lexical_cast& error) {options[opt].intval = 0; }
      try {
        options[opt].dblval = boost::lexical_cast<double>(val);
      } catch(boost::bad_lexical_cast& error) { options[opt].dblval = 0.0; }

      if (val == "true" || val == "TRUE" || 
          val == "yes" || val == "YES" || val == "1") options[opt].boolval = true;
    }

    template <typename T>
    void set_option(const std::string& opt, const T& val) {
      if (boost::is_convertible<T, std::string>::value) {
        set_option_str(opt, robust_cast<std::string>(val));
      } else {
        options[opt].strval  = robust_cast<std::string>(val);
        options[opt].intval  = robust_cast<int>(val);
        options[opt].dblval  = robust_cast<double>(val);
        options[opt].boolval = robust_cast<bool>(val);
      }
    }

    /**
     * Test if the option has been created
     */
    inline bool is_set(const std::string& opt) const { 
      return options.find(opt) != options.end();
    }


    /**
     * Reads a string option
     */
    inline bool get_option(const std::string& opt, std::string& val) const {
      std::map<std::string, option_values>::const_iterator i = options.find(opt);
      if (i == options.end()) return false;
      val = i->second.strval;
      return true;
    }

   /**
     * Reads a string option
     */
    inline bool get_option(const std::string& opt, bool& val) const {
      std::map<std::string, option_values>::const_iterator i = options.find(opt);
      if (i == options.end()) return false;
      val = i->second.boolval;
      return true;
    }


    /**
     * Reads a integer option
     */
    template <typename IntType>
    inline bool get_option(const std::string& opt, IntType& val) const {
      std::map<std::string, option_values>::const_iterator i = options.find(opt);
      if (i == options.end()) return false;
      val = i->second.intval;
      return true;
    }

    /**
     * Reads a float option
     */
    inline bool get_option(const std::string& opt, float& val) const {
      std::map<std::string, option_values>::const_iterator i = options.find(opt);
      if (i == options.end()) return false;
      val = i->second.dblval;
      return true;
    }

    /**
     * Reads a double option
     */
    inline bool get_option(const std::string& opt, double& val) const {
      std::map<std::string, option_values>::const_iterator i = options.find(opt);
      if (i == options.end()) return false;
      val = i->second.dblval;
      return true;
    }


    /**
     * Erases an option
     */
    inline void erase_option(const std::string &opt) {
      options.erase(opt);
    }

    /**
     * Clears all options
     */
    void clear_options() {
      options.clear();
    }

  
    /**
     * Parses an option stream  of the form "a=b c=d ..."
     */
    inline bool parse_options(std::istream& s) {
      options.clear();
      std::string opt, value;
      // read till the equal
      while(s.good()) {
        getline(s, opt, '=');
        if (s.bad() || s.eof()) return false;
        
        getline(s, value, ' ');
        if (s.bad()) return false;
        set_option_str(trim(opt), trim(value));
      }
      return true;
    }

    /// The internal storage of the options
    struct option_values{
      std::string strval;
      int intval;
      double dblval;
      bool boolval;
      option_values () : intval(0), dblval(0), boolval(false) { }
    };


    std::vector<std::string> get_option_keys() const {
      std::map<std::string, option_values>::const_iterator iter = options.begin();
      std::vector<std::string> ret;
      while (iter != options.end()) {
        ret.push_back(iter->first);
        ++iter;
      }
      return ret;
    }
    
    /**
     * Parse a comma delimited series of key1=value1,key2=value2 
     */
    void parse_string(std::string arguments);

    std::map<std::string, option_values> options;

  };


  std::ostream& operator<<(std::ostream& out,
                           const graphlab::options_map& opts);


} // end of graphlab namespace


#endif


================================================
FILE: src/graphlab/parallel/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/parallel/atomic.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ATOMIC_HPP
#define GRAPHLAB_ATOMIC_HPP

#include <stdint.h>

#include <boost/type_traits/is_integral.hpp>

#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/parallel/atomic_ops.hpp>

namespace graphlab {
namespace graphlab_impl {
  template<typename T, bool IsIntegral>
  class atomic_impl {};
  /**
   * \internal  
   * \brief atomic object 
   * A templated class for creating atomic numbers.
   */
  template<typename T>
  class atomic_impl <T, true>: public IS_POD_TYPE {
  public:
    //! The current value of the atomic number
    volatile T value;

    //! Creates an atomic number with value "value"
    atomic_impl(const T& value = T()) : value(value) { }
    
    //! Performs an atomic increment by 1, returning the new value
    T inc() { return __sync_add_and_fetch(&value, 1);  }

    //! Performs an atomic decrement by 1, returning the new value
    T dec() { return __sync_sub_and_fetch(&value, 1);  }
    
    //! Lvalue implicit cast
    operator T() const { return value; }

    //! Performs an atomic increment by 1, returning the new value
    T operator++() { return inc(); }

    //! Performs an atomic decrement by 1, returning the new value
    T operator--() { return dec(); }
    
    //! Performs an atomic increment by 'val', returning the new value
    T inc(const T val) { return __sync_add_and_fetch(&value, val);  }
    
    //! Performs an atomic decrement by 'val', returning the new value
    T dec(const T val) { return __sync_sub_and_fetch(&value, val);  }
    
    //! Performs an atomic increment by 'val', returning the new value
    T operator+=(const T val) { return inc(val); }

    //! Performs an atomic decrement by 'val', returning the new value
    T operator-=(const T val) { return dec(val); }

    //! Performs an atomic increment by 1, returning the old value
    T inc_ret_last() { return __sync_fetch_and_add(&value, 1);  }
    
    //! Performs an atomic decrement by 1, returning the old value
    T dec_ret_last() { return __sync_fetch_and_sub(&value, 1);  }

    //! Performs an atomic increment by 1, returning the old value
    T operator++(int) { return inc_ret_last(); }

    //! Performs an atomic decrement by 1, returning the old value
    T operator--(int) { return dec_ret_last(); }

    //! Performs an atomic increment by 'val', returning the old value
    T inc_ret_last(const T val) { return __sync_fetch_and_add(&value, val);  }
    
    //! Performs an atomic decrement by 'val', returning the new value
    T dec_ret_last(const T val) { return __sync_fetch_and_sub(&value, val);  }

    //! Performs an atomic exchange with 'val', returning the previous value
    T exchange(const T val) { return __sync_lock_test_and_set(&value, val);  }
  };
  
  // specialization for floats and doubles
  template<typename T>
  class atomic_impl <T, false>: public IS_POD_TYPE {
  public:
    //! The current value of the atomic number
    volatile T value;

    //! Creates an atomic number with value "value"
    atomic_impl(const T& value = T()) : value(value) { }
    
    //! Performs an atomic increment by 1, returning the new value
    T inc() { return inc(1);  }

    //! Performs an atomic decrement by 1, returning the new value
    T dec() { return dec(1);  }
    
    //! Lvalue implicit cast
    operator T() const { return value; }

    //! Performs an atomic increment by 1, returning the new value
    T operator++() { return inc(); }

    //! Performs an atomic decrement by 1, returning the new value
    T operator--() { return dec(); }
    
    //! Performs an atomic increment by 'val', returning the new value
    T inc(const T val) { 
      T prev_value;
      T new_value;
      do {
        prev_value = value;
        new_value = prev_value + val;
      } while(!atomic_compare_and_swap(value, prev_value, new_value));
      return new_value; 
    }
    
    //! Performs an atomic decrement by 'val', returning the new value
    T dec(const T val) { 
      T prev_value;
      T new_value;
      do {
        prev_value = value;
        new_value = prev_value - val;
      } while(!atomic_compare_and_swap(value, prev_value, new_value));
      return new_value; 
    }
    
    //! Performs an atomic increment by 'val', returning the new value
    T operator+=(const T val) { return inc(val); }

    //! Performs an atomic decrement by 'val', returning the new value
    T operator-=(const T val) { return dec(val); }

    //! Performs an atomic increment by 1, returning the old value
    T inc_ret_last() { return inc_ret_last(1);  }
    
    //! Performs an atomic decrement by 1, returning the old value
    T dec_ret_last() { return dec_ret_last(1);  }

    //! Performs an atomic increment by 1, returning the old value
    T operator++(int) { return inc_ret_last(); }

    //! Performs an atomic decrement by 1, returning the old value
    T operator--(int) { return dec_ret_last(); }

    //! Performs an atomic increment by 'val', returning the old value
    T inc_ret_last(const T val) { 
      T prev_value;
      T new_value;
      do {
        prev_value = value;
        new_value = prev_value + val;
      } while(!atomic_compare_and_swap(value, prev_value, new_value));
      return prev_value; 
    }
    
    //! Performs an atomic decrement by 'val', returning the new value
    T dec_ret_last(const T val) { 
      T prev_value;
      T new_value;
      do {
        prev_value = value;
        new_value = prev_value - val;
      } while(!atomic_compare_and_swap(value, prev_value, new_value));
      return prev_value; 
    }

    //! Performs an atomic exchange with 'val', returning the previous value
    T exchange(const T val) { return __sync_lock_test_and_set(&value, val);  }
  };
} // namespace graphlab_impl

template <typename T>
class atomic: public graphlab_impl::atomic_impl<T, boost::is_integral<T>::value> { 
 public:
  //! Creates an atomic number with value "value"
  atomic(const T& value = T()): 
    graphlab_impl::atomic_impl<T, boost::is_integral<T>::value>(value) { }
 
};

} // namespace graphlab
#endif


================================================
FILE: src/graphlab/parallel/atomic_add_vector2_empty_specialization.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com)
 *
 */


#ifndef GRAPHLAB_ATOMIC_ADD_VECTOR2_EMPTY_SPECIALIZATION_HPP
#define GRAPHLAB_ATOMIC_ADD_VECTOR2_EMPTY_SPECIALIZATION_HPP


#include <vector>


#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/lock_free_pool.hpp>
#include <graphlab/util/empty.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/parallel/atomic_add_vector2.hpp>


namespace graphlab {

  /**
   * \TODO DOCUMENT THIS CLASS
   */

  template<>
  class atomic_add_vector2<graphlab::empty> {
  public:
    typedef graphlab::empty value_type;


  private:

    dense_bitset atomic_box_vec;


    /** Not assignable */
    void operator=(const atomic_add_vector2& other) { }


  public:
    /** Initialize the per vertex task set */
    atomic_add_vector2(size_t num_vertices = 0) {
      resize(num_vertices);
      atomic_box_vec.clear();
    }

    /**
     * Resize the internal locks for a different graph
     */
    void resize(size_t num_vertices) {
      atomic_box_vec.resize(num_vertices);
      atomic_box_vec.clear();
    }

    /** Add a task to the set returning false if the task was already
        present. */
    bool add(const size_t& idx,
             const value_type& val) {
      return !atomic_box_vec.set_bit(idx);
    } // end of add task to set


    // /** Add a task to the set returning false if the task was already
    //     present. */
    // bool add_unsafe(const size_t& idx,
    //                 const value_type& val) {
    //   ASSERT_LT(idx, atomic_box_vec.size());
    //   return atomic_box_vec[idx].set_unsafe(pool, val, joincounter);
    // } // end of add task to set


    bool add(const size_t& idx,
             const value_type& val,
             value_type& new_value) {
      return !atomic_box_vec.set_bit(idx);
    } // end of add task to set


    bool test_and_get(const size_t& idx,
                      value_type& ret_val) {
      return atomic_box_vec.clear_bit(idx);
    }

    bool peek(const size_t& idx,
                   value_type& ret_val) {
      return atomic_box_vec.get(idx);
    }

    bool empty(const size_t& idx) const {
      return !atomic_box_vec.get(idx);
    }

    size_t size() const {
      return atomic_box_vec.size();
    }

    size_t num_joins() const {
      return 0;
    }


    void clear() {
      atomic_box_vec.clear();
    }

    void clear(size_t i) { atomic_box_vec.clear_bit(i);}

  }; // end of vertex map

}; // end of namespace graphlab

#undef VALUE_PENDING

#endif


================================================
FILE: src/graphlab/parallel/atomic_ops.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ATOMIC_OPS_HPP
#define GRAPHLAB_ATOMIC_OPS_HPP

#include <stdint.h>


namespace graphlab {
  /**
   * \ingroup util
     atomic instruction that is equivalent to the following:
     \code
     if (a==oldval) {    
       a = newval;           
       return true;          
     }
     else {
       return false;
    }
    \endcode
  */
  template<typename T>
  bool atomic_compare_and_swap(T& a, T oldval, T newval) {
    return __sync_bool_compare_and_swap(&a, oldval, newval);
  };

  /**
   * \ingroup util
     atomic instruction that is equivalent to the following:
     \code
     if (a==oldval) {    
       a = newval;           
       return true;          
     }
     else {
       return false;
    }
    \endcode
  */
  template<typename T>
  bool atomic_compare_and_swap(volatile T& a, 
                               T oldval, 
                               T newval) {
    return __sync_bool_compare_and_swap(&a, oldval, newval);
  };

  /**
   * \ingroup util
     atomic instruction that is equivalent to the following:
     \code
     if (a==oldval) {    
       a = newval;           
       return true;          
     }
     else {
       return false;
    }
    \endcode
  */
  template <>
  inline bool atomic_compare_and_swap(volatile double& a, 
                                      double oldval, 
                                      double newval) {
    volatile uint64_t* a_ptr = reinterpret_cast<volatile uint64_t*>(&a);
    const uint64_t* oldval_ptr = reinterpret_cast<const uint64_t*>(&oldval);
    const uint64_t* newval_ptr = reinterpret_cast<const uint64_t*>(&newval);
    return __sync_bool_compare_and_swap(a_ptr, *oldval_ptr, *newval_ptr);
  };

  /**
   * \ingroup util
     atomic instruction that is equivalent to the following:
     \code
     if (a==oldval) {    
       a = newval;           
       return true;          
     }
     else {
       return false;
    }
    \endcode
  */
  template <>
  inline bool atomic_compare_and_swap(volatile float& a, 
                                      float oldval, 
                                      float newval) {
    volatile uint32_t* a_ptr = reinterpret_cast<volatile uint32_t*>(&a);
    const uint32_t* oldval_ptr = reinterpret_cast<const uint32_t*>(&oldval);
    const uint32_t* newval_ptr = reinterpret_cast<const uint32_t*>(&newval);
    return __sync_bool_compare_and_swap(a_ptr, *oldval_ptr, *newval_ptr);
  };

  /** 
    * \ingroup util
    * \brief Atomically exchanges the values of a and b.
    * \warning This is not a full atomic exchange. Read of a,
    * and the write of b into a is atomic. But the write into b is not.
    */
  template<typename T>
  void atomic_exchange(T& a, T& b) {
    b = __sync_lock_test_and_set(&a, b);
  };

  /** 
    * \ingroup util
    * \brief Atomically exchanges the values of a and b.
    * \warning This is not a full atomic exchange. Read of a,
    * and the write of b into a is atomic. But the write into b is not.
    */
  template<typename T>
  void atomic_exchange(volatile T& a, T& b) {
    b = __sync_lock_test_and_set(&a, b);
  };

  /** 
    * \ingroup util
    * \brief Atomically sets a to the newval, returning the old value
    */
  template<typename T>
  T fetch_and_store(T& a, const T& newval) {
    return __sync_lock_test_and_set(&a, newval);
  };

}
#endif


================================================
FILE: src/graphlab/parallel/cache_line_pad.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#ifndef GRAPHLAB_CACHE_LINE_PAD
#define GRAPHLAB_CACHE_LINE_PAD

namespace graphlab {
    /**
     * Used to prevent false cache sharing by padding T
     */
    template <typename T> struct cache_line_pad  {
      T value;
      char pad[64 - (sizeof(T) % 64)];      
      cache_line_pad(const T& value = T()) : value(value) { }
      T& operator=(const T& other) { return value = other; }      
      operator T() const { return value; }
    }; // end of cache_line_pad

}; // end of namespace


#endif


================================================
FILE: src/graphlab/parallel/deferred_rwlock.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DEFERRED_RWLOCK_HPP
#define DEFERRED_RWLOCK_HPP
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/queued_rwlock.hpp>
#include <graphlab/logger/assertions.hpp>
namespace graphlab {
class deferred_rwlock{
 public:

  struct request{
    char lockclass : 2;
    __attribute__((may_alias)) uint64_t id : 62; 
    request* next;
  };
 private:
  request* head;
  request* tail;
  uint16_t reader_count;
  bool writer;
  simple_spinlock lock;
 public:

  deferred_rwlock(): head(NULL),
                      tail(NULL), reader_count(0),writer(false) { }

  // debugging purposes only
  inline size_t get_reader_count() {
    __sync_synchronize();
    return reader_count;
  }

  // debugging purposes only
  inline bool has_waiters() {
    return head != NULL || tail != NULL;
  }

  inline void insert_queue(request *I) {
    if (head == NULL) {
      head = I;
      tail = I;
    }
    else {
      tail->next = I;
      tail = I;
    }
  }
  inline void insert_queue_head(request *I) {
    if (head == NULL) {
      head = I;
      tail = I;
    }
    else {
      I->next = head;
      head = I;
    }
  }
  
  inline bool writelock_priority(request *I) {
    I->next = NULL;
    I->lockclass = QUEUED_RW_LOCK_REQUEST_WRITE;
    lock.lock();
    if (reader_count == 0 && writer == false) {
      // fastpath
      writer = true;
      lock.unlock();
      return true;
    }
    else {
      insert_queue_head(I);
      lock.unlock();
      return false;
    }
  }
  
  inline bool writelock(request *I) {
    I->next = NULL;
    I->lockclass = QUEUED_RW_LOCK_REQUEST_WRITE;
    lock.lock();
    if (reader_count == 0 && writer == false) {
      // fastpath
      writer = true;
      lock.unlock();
      return true;
    }
    else {
      insert_queue(I);
      lock.unlock();
      return false;
    }
  }

  // completes the write lock on the head. lock must be acquired
  // head must be a write lock
  inline void complete_wrlock() {
  //  ASSERT_EQ(reader_count.value, 0);
    head = head->next;
    if (head == NULL) tail = NULL;
    writer = true;
  }

  // completes the read lock on the head. lock must be acquired
  // head must be a read lock
  inline size_t complete_rdlock(request* &released) {
    released = head;
    size_t numcompleted = 1;
    head = head->next;
    request* readertail = released;
    while (head != NULL && head->lockclass == QUEUED_RW_LOCK_REQUEST_READ) {
      readertail = head;
      head = head->next;
      numcompleted++;
    }
    reader_count += numcompleted;
    if (head == NULL) tail = NULL;
    
    // now released is the head to a reader list
    // and head is the head of a writer list
    // I want to go through the writer list and extract all the readers
    // this essentially 
    // splits the list into two sections, one containing only readers, and 
    // one containing only writers.
    // (reader biased locking)
    if (head != NULL) {
      request* latestwriter = head;
      request* cur = head->next;
      while (1) {
        if (cur->lockclass == QUEUED_RW_LOCK_REQUEST_WRITE) {
          latestwriter = cur;
        }
        else {
          readertail->next = cur;
          readertail = cur;
          reader_count++;
          numcompleted++;
          latestwriter->next = cur->next;
        }
        if (cur == tail) break;
        cur=cur->next;
      }
    }
    return numcompleted;
  }
  
  inline size_t wrunlock(request* &released) {
    released = NULL;
    lock.lock();
    writer = false;
    size_t ret = 0;
    if (head != NULL) {
      if (head->lockclass == QUEUED_RW_LOCK_REQUEST_READ) {
        ret = complete_rdlock(released);
        if (ret == 2) assert(released->next != NULL);
      }
      else {
        writer = true;
        released = head;
        complete_wrlock();
        ret = 1;
      }
    }
    lock.unlock();
    return ret;
  }

  inline size_t readlock(request *I, request* &released)  {
    released = NULL;
    size_t ret = 0;
    I->next = NULL;
    I->lockclass = QUEUED_RW_LOCK_REQUEST_READ;
    lock.lock();
    // there are readers and no one is writing
    if (head == NULL && writer == false) {
      // fast path
      ++reader_count;
      lock.unlock();
      released = I;
      return 1;
    }
    else {
      // slow path. Insert into queue
      insert_queue(I);
      if (head->lockclass == QUEUED_RW_LOCK_REQUEST_READ && writer == false) {
        ret = complete_rdlock(released);
      }
      lock.unlock();
      return ret;
    }
  }

  inline size_t readlock_priority(request *I, request* &released)  {
    released = NULL;
    size_t ret = 0;
    I->next = NULL;
    I->lockclass = QUEUED_RW_LOCK_REQUEST_READ;
    lock.lock();
    // there are readers and no one is writing
    if (head == NULL && writer == false) {
      // fast path
      ++reader_count;
      lock.unlock();
      released = I;
      return 1;
    }
    else {
      // slow path. Insert into queue
      insert_queue_head(I);
      if (head->lockclass == QUEUED_RW_LOCK_REQUEST_READ && writer == false) {
        ret = complete_rdlock(released);
      }
      lock.unlock();
      return ret;
    }
  }

  inline size_t rdunlock(request* &released)  {
    released = NULL;
    lock.lock();
    --reader_count;
    if (reader_count == 0) {
      size_t ret = 0;
      if (head != NULL) {
        if (head->lockclass == QUEUED_RW_LOCK_REQUEST_READ) {
          ret = complete_rdlock(released);
        }
        else {
          writer = true;
          released = head;
          complete_wrlock();
          ret = 1;
        }
      }
      lock.unlock();
      return ret;
    }
    else {
      lock.unlock();
      return 0;
    }
  }
};

}
#endif


================================================
FILE: src/graphlab/parallel/fiber_barrier.hpp
================================================
#ifndef GRAPHLAB_PARALLEL_FIBER_BARRIER_HPP
#define GRAPHLAB_PARALLEL_FIBER_BARRIER_HPP
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/fiber_control.hpp>
namespace graphlab {
  class fiber_barrier {
  private:
    graphlab::mutex mutex;
    graphlab::conditional conditional;
    mutable int needed;
    mutable int called;   
    
    mutable bool barrier_sense;
    mutable bool barrier_release;
    bool alive;

    mutable std::vector<size_t> fiber_handles;

    // not copyconstructible
    fiber_barrier(const fiber_barrier&) { }


    // not copyable
    void operator=(const fiber_barrier& m) { }

  public:
    /// Construct a barrier which will only fall when numthreads enter
    fiber_barrier(size_t numthreads) {
      needed = numthreads;
      called = 0;
      barrier_sense = false;
      barrier_release = true;
      alive = true;
      fiber_handles.resize(needed);
    }

    void resize_unsafe(size_t numthreads) {
      needed = numthreads;
      fiber_handles.resize(needed);
    }
    
    /// Wait on the barrier until numthreads has called wait
    inline void wait() const {
      if (!alive) return;
      mutex.lock();
      // set waiting;
      fiber_handles[called] = fiber_control::get_tid();
      called++;
      bool listening_on = barrier_sense;
      if (called == needed) {
        // if I have reached the required limit, wait up. Set waiting
        // to 0 to make sure everyone wakes up
        std::vector<size_t> to_wake = fiber_handles;
        called = 0;
        barrier_release = barrier_sense;
        barrier_sense = !barrier_sense;
        // clear all waiting, wake everyone. (less 1 since current thread
        // is already awake)
        for (size_t i = 0;i < to_wake.size() - 1; ++i) {
          fiber_control::schedule_tid(to_wake[i]);
        } 
      } else {
        // while no one has broadcasted, sleep
        while(barrier_release != listening_on && alive) {
          fiber_control::deschedule_self(&mutex.m_mut);
          mutex.lock();
        }
      }
      mutex.unlock();
    }
  }; // end of conditional
  
}
#endif 


================================================
FILE: src/graphlab/parallel/fiber_conditional.hpp
================================================
#ifndef GRAPHLAB_FIBER_CONDITIONAL_HPP
#define GRAPHLAB_FIBER_CONDITIONAL_HPP
#include <vector>
#include <queue>
#include <graphlab/parallel/fiber_control.hpp>
namespace graphlab {

/**
 * \ingroup util
 * Wrapper around pthread's condition variable, that can work with both
 * fibers and threads simultaneously, but at a cost of much greater memory 
 * requirements.
 *
 * Limitations.
 *  - Does not support timed wait
 *  - threads and fibers are not queued perfectly. fibers are preferentially 
 *    signaled.
 *
 * Before you use, see \ref parallel_object_intricacies.
 */
class fiber_conditional {
 private:
   mutable pthread_cond_t  m_cond;
   mutex lock;
   mutable std::queue<size_t> fibers; // used to hold the fibers that are waiting here

   // not copyable
   void operator=(const fiber_conditional& m) { }

 public:
   fiber_conditional() {
     int error = pthread_cond_init(&m_cond, NULL);
     ASSERT_TRUE(!error);
   }

   /** Copy constructor which does not copy. Do not use!
     Required for compatibility with some STL implementations (LLVM).
     which use the copy constructor for vector resize, 
     rather than the standard constructor.    */
   fiber_conditional(const fiber_conditional &) {
     int error = pthread_cond_init(&m_cond, NULL);
     ASSERT_TRUE(!error);
   }


   /// Waits on condition. The mutex must already be acquired. Caller
   /// must be careful about spurious wakes.
   inline void wait(const mutex& mut) const {
     size_t tid = fiber_control::get_tid();
     if (tid > 0) {
       lock.lock();
       fibers.push(tid);
       lock.unlock();
       fiber_control::deschedule_self(&mut.m_mut);
       mut.lock();
     } else {
       int error = pthread_cond_wait(&m_cond, &mut.m_mut);
       ASSERT_TRUE(!error);
      }
   }
   /// Signals one waiting thread to wake up
   inline void signal() const {
     if (!fibers.empty()) {
       lock.lock();
       if (!fibers.empty()) {
         size_t tid = fibers.front();
         fibers.pop();
         lock.unlock();
         fiber_control::schedule_tid(tid);
         return;
       }
       lock.unlock();
     }
     int error = pthread_cond_signal(&m_cond);
     ASSERT_TRUE(!error);
   }
   /// Wakes up all waiting threads
   inline void broadcast() const {
     lock.lock();
     while (!fibers.empty()) {
       size_t tid = fibers.front();
       fibers.pop();
       fiber_control::schedule_tid(tid);
     }
     lock.unlock();
     int error = pthread_cond_broadcast(&m_cond);
     ASSERT_TRUE(!error);
   }

   ~fiber_conditional() {
     ASSERT_EQ(fibers.size(), 0);
     int error = pthread_cond_destroy(&m_cond);
     ASSERT_TRUE(!error);
   }
}; 
}
#endif


================================================
FILE: src/graphlab/parallel/fiber_control.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/bind.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/macros_def.hpp>
//#include <valgrind/valgrind.h>
namespace graphlab {

bool fiber_control::tls_created = false;
bool fiber_control::instance_created = false;
size_t fiber_control::instance_construct_params_nworkers = 0;
size_t fiber_control::instance_construct_params_affinity_base = 0;
pthread_key_t fiber_control::tlskey;

fiber_control::affinity_type fiber_control::all_affinity() {
  affinity_type ret;
  ret.fill();
  return ret;
}

fiber_control::fiber_control(size_t nworkers, 
                             size_t affinity_base)
    :nworkers(nworkers),
    affinity_base(affinity_base),
    stop_workers(false),
    flsdeleter(NULL) {
  // initialize the thread local storage keys
  if (!tls_created) {
    pthread_key_create(&tlskey, fiber_control::tls_deleter);
    tls_created = true;
  }

  // set up the queues.
  schedule.resize(nworkers);
  for (size_t i = 0;i < nworkers; ++i) {
    schedule[i].waiting = false;
    schedule[i].nwaiting = 0;
    schedule[i].affinity_queue = new inplace_lf_queue2<fiber>;
    schedule[i].priority_queue = new inplace_lf_queue2<fiber>;
    schedule[i].popped_affinity_queue = NULL;
    schedule[i].popped_priority_queue = NULL;
  }
  // launch the workers
  for (size_t i = 0;i < nworkers; ++i) {
    workers.launch(boost::bind(&fiber_control::worker_init, this, i), 
                   affinity_base + i);
  }
}

fiber_control::~fiber_control() {
  join();
  stop_workers = true;
  for (size_t i = 0;i < nworkers; ++i) {
    schedule[i].active_lock.lock();
    schedule[i].active_cond.broadcast();
    schedule[i].active_lock.unlock();
    delete schedule[i].affinity_queue;
    delete schedule[i].priority_queue;
  }
  workers.join();


  pthread_key_delete(tlskey);
}


void fiber_control::tls_deleter(void* f) {
  fiber_control::tls* t = (fiber_control::tls*)(f);
  delete t;
}

void fiber_control::create_tls_ptr() {
  pthread_setspecific(tlskey, (void*)(new fiber_control::tls));
}


fiber_control::tls* fiber_control::get_tls_ptr() {
  if (tls_created == false) return NULL;
  else return (fiber_control::tls*) pthread_getspecific(tlskey);
}

fiber_control::fiber* fiber_control::get_active_fiber() {
  tls* t = get_tls_ptr();
  if (t != NULL) return t->cur_fiber;
  else return NULL;
}


void fiber_control::active_queue_insert_tail(size_t workerid, fiber_control::fiber* value) {
  if (value->scheduleable) {
//     printf("%ld: Scheduling %ld on %ld\n", get_worker_id(), value->id, workerid);
    schedule[workerid].affinity_queue->enqueue(value);
    ++schedule[workerid].nwaiting;
    if (schedule[workerid].waiting) {
      schedule[workerid].active_lock.lock();
      schedule[workerid].active_cond.signal();
      schedule[workerid].active_lock.unlock();
    }
  }
}


void fiber_control::active_queue_insert_head(size_t workerid, fiber_control::fiber* value) {
  if (value->scheduleable) {
//     printf("%ld: Scheduling %ld on %ld\n", get_worker_id(), value->id, workerid);
    schedule[workerid].priority_queue->enqueue(value);
    ++schedule[workerid].nwaiting;
    if (schedule[workerid].waiting) {
      schedule[workerid].active_lock.lock();
      schedule[workerid].active_cond.signal();
      schedule[workerid].active_lock.unlock();
    }
  }
}

fiber_control::fiber* fiber_control::try_pop_queue(inplace_lf_queue2<fiber>& lfqueue,
                                                   fiber*& popped_queue) {
  fiber_control::fiber* ret = NULL;
  // if there is stuff in the popped queue, pop it.
  if (popped_queue == NULL) {
    popped_queue = lfqueue.dequeue_all();
  }

  if (popped_queue != NULL) {
    ret = popped_queue;
    do {
      popped_queue = ret->next;
      asm volatile("pause\n": : :"memory");
    } while(popped_queue == NULL);
    // we have reached the end of the queue. clear the popped queue
    // and return
    if (popped_queue == lfqueue.end_of_dequeue_list()) {
      popped_queue = NULL;
    }
  }
  return ret;
}

fiber_control::fiber* fiber_control::active_queue_remove(size_t workerid) {
  fiber_control::fiber* ret = NULL;
  thread_schedule& curts = schedule[workerid];
  ret = try_pop_queue(*curts.priority_queue, curts.popped_priority_queue);
  if (ret == NULL) {
    ret = try_pop_queue(*curts.affinity_queue , curts.popped_affinity_queue);
  }
  if (ret) {
    // printf("%ld: Running %ld\n", get_worker_id(), ret->id);
  }
  return ret;
}

void fiber_control::exit() {
  distributed_control* dc = distributed_control::get_instance();
  if (dc) dc->flush();
  fiber* fib = get_active_fiber();
  if (fib != NULL) {
    // add to garbage.
    fib->terminate = true;
    yield(); // never returns
    ASSERT_MSG(false, "Impossible Condition. Dead Fiber woke up");
  } else {
    ASSERT_MSG(false, "Calling fiber exit not from a fiber");
  }
}

static timer flush_timer;
mutex flush_lock;

void fiber_control::worker_init(size_t workerid) {
  /*
   * This is the "root" stack for each worker.
   * When there are active user threads associated with this worker, 
   * it will switch directly between the fibers.
   * But, when the worker has no other fiber to run, it will return to this
   * stack and and wait in a condition variable
   */
  // create a root context
  create_tls_ptr();
  // set up the tls structure
  tls* t = get_tls_ptr();
  t->prev_fiber = NULL;
  t->cur_fiber = NULL;
  t->garbage = NULL;
  t->workerid = workerid;
  t->parent = this;

  schedule[workerid].waiting = true;
  schedule[workerid].active_lock.lock();
  while(!stop_workers) {
    // get a fiber to run
    fiber* next_fib = t->parent->active_queue_remove(workerid);
    if (next_fib != NULL) {
      // if there is a fiber. yield to it
      schedule[workerid].active_lock.unlock();
      schedule[workerid].waiting = false;
      active_workers.inc();
      yield_to(next_fib);
      if (flush_timer.current_time() > 0.0001 && flush_lock.try_lock()) {
        distributed_control* dc = distributed_control::get_instance();
        if (dc) dc->flush_soon();
        flush_timer.start();
        flush_lock.unlock();
      }
      active_workers.dec();
      schedule[workerid].waiting = true;
      schedule[workerid].active_lock.lock();
    } else {
      // if there is no fiber. wait.
      schedule[workerid].active_cond.wait(schedule[workerid].active_lock);
    }
  }
  schedule[workerid].active_lock.unlock();
}

struct trampoline_args {
  boost::function<void(void)> fn;
};

// the trampoline to call the user function. This function never returns
void fiber_control::trampoline(intptr_t _args) {
  // we may have launched to here by switching in from another fiber.
  // we will need to clean up the previous fiber
  tls* t = get_tls_ptr();
  if (t->prev_fiber) t->parent->reschedule_fiber(t->workerid, t->prev_fiber);
  t->prev_fiber = NULL;

  trampoline_args* args = reinterpret_cast<trampoline_args*>(_args);
  try {
    args->fn();
  } catch (...) {
  }
  delete args;
  fiber_control::exit();
}

size_t fiber_control::launch(boost::function<void(void)> fn, 
                             size_t stacksize, 
                             affinity_type affinity) {
  ASSERT_GT(affinity.popcount(), 0);
  size_t b = 0;
  ASSERT_TRUE(affinity.first_bit(b));
  // make sure there is always a worker I can work on
  ASSERT_LT(b, nworkers);

  // allocate a stack
  fiber* fib = new fiber;
  fib->parent = this;
  fib->stack = malloc(stacksize);
  fib->id = fiber_id_counter.inc();
  foreach(size_t b, affinity) {
    if (b < nworkers) fib->affinity_array.push_back((unsigned char)b);
    else break;
  }
  ASSERT_GT(fib->affinity_array.size(), 0);
  fib->affinity = affinity;
  //VALGRIND_STACK_REGISTER(fib->stack, (char*)fib->stack + stacksize);
  fib->fls = NULL;
  fib->next = NULL;
  fib->deschedule_lock = NULL;
  fib->terminate = false;
  fib->descheduled = false;
  fib->scheduleable = true;
  // construct the initial context
  trampoline_args* args = new trampoline_args;
  args->fn = fn;
  fib->initial_trampoline_args = (intptr_t)(args);
  // stack grows downwards.
  fib->context = boost::context::make_fcontext((char*)fib->stack + stacksize,
                                               stacksize,
                                               trampoline);
  fibers_active.inc();

  // find a place to put the thread
  size_t choice = pick_fiber_worker(fib);
  active_queue_insert_tail(choice, fib);
  return reinterpret_cast<size_t>(fib);
}

size_t fiber_control::pick_fiber_worker(fiber* fib) {
  // first try to use the original worker if possible
  size_t choice = get_worker_id();
  if (choice == (size_t)(-1) || fib->affinity.get(choice) == 0) {
    //choice rejected, pick randomly from the available choices
    // if there is only one affinity option, return it
    if (fib->affinity_array.size() == 1) {
      choice = fib->affinity_array[0];
    } else {
      size_t ra = graphlab::random::fast_uniform<size_t>(0,fib->affinity_array.size() - 1);
      std::swap(fib->affinity_array[ra], fib->affinity_array[0]);
      choice = fib->affinity_array[0];
    }
  }
  return choice;
}

void fiber_control::yield_to(fiber* next_fib) {
  // the core scheduling logic
  tls* t = get_tls_ptr();
  
//   if (next_fib) {
//     if (t->cur_fiber) {
//       printf("%ld: yield to: %ld from %ld\n", get_worker_id(), next_fib->id, t->cur_fiber->id);
//     }  else {
//       printf("%ld: yield to: %ld\n", get_worker_id(), next_fib->id);
//     }
//   } 
  if (next_fib != NULL) {
    // reset the priority flag
    next_fib->priority = false;
    // current fiber moves to previous
    // next fiber move to current
    t->prev_fiber = t->cur_fiber;
    t->cur_fiber = next_fib;
    if (t->prev_fiber != NULL) {
      // context switch to fib outside the lock
      boost::context::jump_fcontext(t->prev_fiber->context,
                                    t->cur_fiber->context,
                                    t->cur_fiber->initial_trampoline_args);
    } else {
      boost::context::jump_fcontext(&t->base_context,
                                    t->cur_fiber->context,
                                    t->cur_fiber->initial_trampoline_args);
    }
  } else {
    // ok. there isn't anything to schedule to
    // am I meant to be terminated? or descheduled?
    if (t->cur_fiber &&
        (t->cur_fiber->terminate || t->cur_fiber->descheduled) ) {
      // yup. killing current fiber
      // context switch back to basecontext which will
      // do the cleanup
      //
      // current fiber moves to previous
      // next fiber (base context) move to current
      // (as identifibed by cur_fiber = NULL)
      t->prev_fiber = t->cur_fiber;
      t->cur_fiber = NULL;
      boost::context::jump_fcontext(t->prev_fiber->context,
                                    &t->base_context,
                                    0);
    } else {
      // nothing to do, and not terminating...
      // then don't yield!
      return;
    }
  }
  // reread the tls pointer because we may have woken up in a different thread
  t = get_tls_ptr();
  // reschedule the previous fiber
  if (t->prev_fiber) reschedule_fiber(t->workerid, t->prev_fiber);
  t->prev_fiber = NULL;

  // if distributed_controller alive
  distributed_control* dc = distributed_control::get_instance();
  if (dc && t->workerid < dc->num_handler_threads()) {
    dc->handle_incoming_calls(t->workerid, dc->num_handler_threads());
  }
}

void fiber_control::reschedule_fiber(size_t workerid, fiber* fib) {
  fib->lock.lock();
  if (!fib->terminate && !fib->descheduled) {
    fib->lock.unlock();
    // we reschedule it
    // Re-lock the queue
    //printf("%ld: Reinserting %ld\n", get_worker_id(), fib->id);
    if (!fib->priority) active_queue_insert_tail(workerid, fib);
    else active_queue_insert_head(workerid, fib);
  } else if (fib->descheduled) {
    // unflag descheduled and unset scheduleable
    fib->descheduled = false;
    fib->scheduleable = false;
    if (fib->deschedule_lock) pthread_mutex_unlock(fib->deschedule_lock);
    fib->deschedule_lock = NULL;
    //printf("%ld: Descheduling complete %ld\n", get_worker_id(), fib->id);
    fib->lock.unlock();
  } else if (fib->terminate) {
    fib->lock.unlock();
    // previous fiber is dead. destroy it
    free(fib->stack);
    //VALGRIND_STACK_DEREGISTER(fib->stack);
    // delete the fiber local storage if any
    if (fib->fls && flsdeleter) flsdeleter(fib->fls);
    delete fib;
    // if we are out of threads, signal the join
    if (fibers_active.dec() == 0) {
      join_lock.lock();
      join_cond.signal();
      join_lock.unlock();
    }
  } else {
    // impossible condition
    assert(false);
  }
}

void fiber_control::yield() {
  // the core scheduling logic
  tls* t = get_tls_ptr();
  if (t == NULL) return;
  // remove some other work to do.
  fiber_control* parentgroup = t->parent;
  size_t workerid = t->workerid;
  fiber* next_fib = parentgroup->active_queue_remove(workerid);
  t->parent->yield_to(next_fib);
}


void fiber_control::fast_yield() {
  yield();
}


void fiber_control::join() {
  join_lock.lock();
  while(fibers_active.value > 0) {
    join_cond.wait(join_lock);
  }
  join_lock.unlock();
}

size_t fiber_control::get_tid() {
  fiber_control::tls* tls = get_tls_ptr();
  if (tls != NULL) return reinterpret_cast<size_t>(tls->cur_fiber);
  else return (size_t)(0);
}


bool fiber_control::in_fiber() {
  return get_tls_ptr() != NULL;
}

void fiber_control::deschedule_self(pthread_mutex_t* lock) {
  fiber* fib = get_tls_ptr()->cur_fiber;
  fib->lock.lock();
  assert(fib->descheduled == false);
  assert(fib->scheduleable == true);
  fib->deschedule_lock = lock;
  fib->descheduled = true;
  //printf("%ld: Descheduling requested %ld\n", get_worker_id(), fib->id);
  fib->lock.unlock();
  yield();
}

bool fiber_control::worker_has_priority_fibers_on_queue() {
  tls* t = get_tls_ptr();
  if (t == NULL) return false;
  fiber_control* parentgroup = t->parent;
  size_t workerid = t->workerid;
  return !parentgroup->schedule[workerid].priority_queue->empty();
}

bool fiber_control::worker_has_fibers_on_queue() {
  tls* t = get_tls_ptr();
  if (t == NULL) return false;
  fiber_control* parentgroup = t->parent;
  size_t workerid = t->workerid;
  return !parentgroup->schedule[workerid].priority_queue->empty() ||
          !parentgroup->schedule[workerid].affinity_queue->empty();
}

size_t fiber_control::get_worker_id() {
  fiber_control::tls* tls = get_tls_ptr();
  if (tls != NULL) return tls->workerid;
  else return (size_t)(-1);
}

void fiber_control::schedule_tid(size_t tid, bool priority) {
  fiber* fib = reinterpret_cast<fiber*>(tid);
  fib->lock.lock();
  // we MUST get here only after the thread was completely descheduled
  // or no deschedule operation has happened yet.
  assert(fib->descheduled == false);
  fib->descheduled = false;
  if (fib->scheduleable == false) {
    // if this thread was descheduled completely. Reschedule it.
    //printf("%ld: Scheduling requested %ld\n", get_worker_id(), fib->id);
    fib->scheduleable = true;
    fib->priority = priority;
    fib->lock.unlock();
    size_t choice = fib->parent->pick_fiber_worker(fib);
    fib->parent->reschedule_fiber(choice, fib);
  } else {
    //printf("%ld: Scheduling requested of running thread %ld\n", get_worker_id(), fib->id);
    fib->lock.unlock();
  }
}


void fiber_control::set_tls_deleter(void (*deleter)(void*)) {
  flsdeleter = deleter;
}

void* fiber_control::get_tls() {
  fiber_control::tls* f = get_tls_ptr();
  if (f != NULL) {
    return f->cur_fiber->fls;
  } else {
    // cannot get TLS of a non-fiber
    ASSERT_MSG(false, "Trying to get a fiber TLS from a non-fiber");
    return NULL;
  }
}

void fiber_control::set_tls(void* tls) {
  fiber_control::tls* f = get_tls_ptr();
  if (f != NULL) {
    f->cur_fiber->fls = tls;
  } else {
    // cannot get TLS of a non-fiber
    ASSERT_MSG(false, "Trying to get a fiber TLS from a non-fiber");
  }
}


void fiber_control::instance_set_parameters(size_t nworkers = 0,
                                            size_t affinity_base = 0) {
  instance_construct_params_nworkers = nworkers;
  instance_construct_params_affinity_base = affinity_base;
}

fiber_control& fiber_control::get_instance() {
  fiber_control::instance_created = true;
  // set sane defaults
  if (instance_construct_params_nworkers == 0) {
    instance_construct_params_nworkers = thread::cpu_count();
  }
  static fiber_control singleton(instance_construct_params_nworkers, 
                                 instance_construct_params_affinity_base);
  return singleton;
}

}


================================================
FILE: src/graphlab/parallel/fiber_control.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIBER_CONTROL_HPP
#define GRAPHLAB_FIBER_CONTROL_HPP

#include <stdint.h>
#include <cstdlib>
#include <boost/context/all.hpp>
#include <boost/function.hpp>
#include <boost/lockfree/queue.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/inplace_lf_queue2.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
namespace graphlab {

/**
 * The master controller for the user mode threading system
 */
class fiber_control {
 public:

  typedef fixed_dense_bitset<64> affinity_type;
  static affinity_type all_affinity();

  struct fiber {
    simple_spinlock lock;
    fiber_control* parent;
    boost::context::fcontext_t* context;
    void* stack;
    size_t id;
    affinity_type affinity;
    std::vector<unsigned char> affinity_array;
    void* fls; // fiber local storage
    fiber* next;
    intptr_t initial_trampoline_args;
    pthread_mutex_t* deschedule_lock; // if descheduled is set, we will
                                      // atomically deschedule and unlock
                                      // this mutex
    bool descheduled; // flag. set if this fiber is to be descheduled.
                      // This is a temporary flag, and is only used to notify
                      // the context switch to deschedule this thread.
                      // lock must be acquired for this to be modified

    bool terminate;   // flag. set if this fiber is to be destroyed.
                      // This is a temporary flag, and is only used to notify
                      // the context switch to destroy this thread.

    bool scheduleable;     // Managed by the queue management routines.
                      // Set if the fiber is inside the scheduling queue
                      // or is running in a thread.
                      // lock must be acquired for this to be modified.
    bool priority;  // flag. If set, rescheduling this fiber
                    // will cause it to be placed at the head of the queue
  };


 private:
  size_t nworkers;
  size_t affinity_base;
  atomic<size_t> fiber_id_counter;
  atomic<size_t> fibers_active;
  atomic<size_t> active_workers;
  mutex join_lock;
  conditional join_cond;

  bool stop_workers;

  // The scheduler is a simple queue. One for each worker
  struct thread_schedule {
    thread_schedule():waiting(false) { }
    mutex active_lock;
    conditional active_cond;
    volatile bool waiting;
    size_t nwaiting;
    // a queue of fibers to evaluate before those in the thread_queue
    inplace_lf_queue2<fiber>* affinity_queue;
    fiber* popped_affinity_queue;

    inplace_lf_queue2<fiber>* priority_queue;
    fiber* popped_priority_queue;
  };
  std::vector<thread_schedule> schedule;

  thread_group workers;


  // locks must be acquired outside the call
  void active_queue_insert_head(size_t workerid, fiber* value);
  void active_queue_insert_tail(size_t workerid, fiber* value);
  void active_queue_insert_tail(fiber* value);
  fiber* active_queue_remove(size_t workerid);

  // a thread local storage for the worker to point to a fiber
  static bool tls_created;
  struct tls {
    fiber_control* parent;
    fiber* prev_fiber; // the fiber we context switch from
    fiber* cur_fiber; // the fiber we are context switching to
    fiber* garbage; // A fiber to delete after the context switch
    size_t workerid;
    boost::context::fcontext_t base_context;
  };

  static pthread_key_t tlskey; // points to the tls structure above
  static void tls_deleter(void* tls);

  /// internal function to create the TLS for the worker threads
  static void create_tls_ptr();
  /// internal function to read the TLS for the worker threads
  static tls* get_tls_ptr();
  /// Returns the current fiber scheduled on this worker thread
  static fiber* get_active_fiber();

  /// Gets a fiber from the lock-free / popped pair
  fiber* try_pop_queue(inplace_lf_queue2<fiber>& lfqueue, fiber*& popped_queue);
  /// The function that each worker thread starts off running
  void worker_init(size_t workerid);

  void reschedule_fiber(size_t workerid, fiber* pfib);
  void yield_to(fiber* next_fib);
  static void trampoline(intptr_t _args);

  void (*flsdeleter)(void*);

  size_t pick_fiber_worker(fiber* fib);

  // delete copy constructor
  fiber_control(fiber_control&) {};
  
 public:

  /// Private constructor
  fiber_control(size_t nworkers, size_t affinity_base);

  ~fiber_control();

  /** the basic launch function
   * Returns a fiber ID. IDs are not sequential.
   * \note The ID is really a pointer to a fiber_control::fiber object.
   */
  size_t launch(boost::function<void (void)> fn, 
                size_t stacksize = 8192, 
                affinity_type worker_affinity = all_affinity());


  /**
   * Waits for all functions to join
   */
  void join();


  /**
   * Returns the number of workers
   */
  size_t num_workers() {
    return nworkers;
  }

  /**
   * Returns the number of threads that have yet to join
   */
  inline size_t num_threads() {
    return fibers_active.value;
  }

  /**
   * Returns the total number threads ever created
   */
  inline size_t total_threads_created() {
    return fiber_id_counter.value;
  }
  /**
   * Sets the TLS deletion function. The deletion function will be called
   * on every non-NULL TLS value.
   */
  void set_tls_deleter(void (*deleter)(void*));

  /**
   * Gets the TLS value. Defaults to NULL.
   * Note that this function will only work within a fiber.
   */
  static void* get_tls();
  /**
   * Sets the TLS value.
   * Note that this function will only work within a fiber.
   * If the value is not NULL, and the deletion function is set by
   * set_tls_deleter(), the deleter will be called on the value on
   * fiber termination.
   */
  static void set_tls(void* value);

  /**
   * Kills the current fiber.
   * Note that this function will only work within a fiber.
   * Implodes dramatically if called from outside a fiber.
   */
  static void exit();

  /**
   * Yields to another fiber.
   * Note that this function will only work within a fiber.
   * If called from outside a fiber, returns immediately.
   */
  static void yield();


  /**
   * Yields to another fiber of the same affinity.
   * Note that this function will only work within a fiber.
   * If called from outside a fiber, returns immediately.
   */
  static void fast_yield();


  /**
   * Returns true if the current worker has other fiber waiting on its queue
   */
  static bool worker_has_fibers_on_queue();


  /**
   * Returns true if the current worker has other priority fibers waiting on 
   * its queue
   */
  static bool worker_has_priority_fibers_on_queue();


  /// True if the singleton instance was created
  static bool instance_created; 
  static size_t instance_construct_params_nworkers; 
  static size_t instance_construct_params_affinity_base;

  /**
   * Sets the fiber control construction parameters.
   * Fails with an assertion failure if the instance has already been created.
   * Must be called prior to any other calls to get_instance()
   * \param nworkers Number of worker threads to spawn. If set to 0,
   *                 the number of workers will be automatically determined
   *                 based on the number of cores the system has.
   * \param affinity_base First worker will have CPU affinity equal to 
   *                      affinity_base. Second will be affinity_base + 1, etc.
   *                      Defaults to 0.
   */
  static void instance_set_parameters(size_t nworkers,
                                      size_t affinity_base);

  /**
   * Gets a reference to the main fiber control singleton
   */
  static fiber_control& get_instance();

  /**
   * Returns the current fiber handle.
   * Note that fiber handles are not sequential, and are really a
   * pointer to an internal datastructure.
   * If called from within a fiber, returns a non-zero value.
   * If called out outsize a fiber, returns 0.
   */
  static size_t get_tid();


  /**
   * Returns true if the calling thread is in a fiber, false otherwise.
   */
  static bool in_fiber();

  /**
   * Returns the worker managing the current fiber.
   * Worker IDs are sequential.
   * If called from outside a fiber, returns (size_t)(-1)
   */
  static size_t get_worker_id();


  /**
   * Atomically deschedules the current thread and unlocks the mutex.
   *
   * deschedule_self() and schedule_tid() must be managed carefully
   * to avoid race conditions. i.e. schedule_tid() happending before
   * deschedule_self().
   *
   * To support this correctly, the descheduling must be paired together
   * with a mutex.
   *
   * For instance, to use this to implement a promise.
   * \code
   * // descheduling fiber
   * pthread_mutex_lock(&lock);
   * if ( ... promise not ready ...) {
   *   deschedule_self(&lock);
   * } else {
   *   pthread_mutex_unlock(&lock);
   * }
   *   ... use the promise ...
   * \endcode
   *
   *
   * The promise execution thread then must do the following
   * \code
   * ... tid contains the fiber ID to wake when promise is done
   * ... compute promise...
   * pthread_mutex_lock(&lock); // same lock as above
   * ... set promise completion...
   * schedule_tid(tid); // wake up the fiber
   * pthread_mutex_unlock(&lock);
   * \endcode
   */
  static void deschedule_self(pthread_mutex_t* lock);

  /**
   *  Schedules a fiber for execution.
   *  If this fiber was previously descheduled by
   *  deschedule_self(), the fiber is scheduled for execution.
   *  Otherwise, nothing happens. Some care must be taken to avoid race
   *  conditions. See the deschedule_self() function for details.
   *  This thread by default will be stuck at the head of queue
   *  and will wake up quickly.
   *
   *  \param priority If true, thread will be placed at the head
   *  of the scheduler. If false, it will be placed at the tail
   *  of the scheduler
   */
  static void schedule_tid(size_t tid, bool priority = true);
};

}

#endif


================================================
FILE: src/graphlab/parallel/fiber_group.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <boost/bind.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/logger/assertions.hpp>
namespace graphlab {

void fiber_group::invoke(const boost::function<void (void)>& spawn_function, 
                         fiber_group* group) {
  spawn_function();
  group->decrement_running_counter();
}


void fiber_group::launch(const boost::function<void (void)> &spawn_function) {
  launch(spawn_function, affinity);
}


void fiber_group::launch(const boost::function<void (void)> &spawn_function,
                         affinity_type worker_affinity) {
  increment_running_counter();
  fiber_control::get_instance().launch(boost::bind(invoke, spawn_function, this), 
                                       stacksize,
                                       worker_affinity);  
}

void fiber_group::launch(const boost::function<void (void)> &spawn_function,
                         size_t worker_affinity) {
  increment_running_counter();
  fiber_group::affinity_type affinity;
  affinity.set_bit(worker_affinity);
  fiber_control::get_instance().launch(boost::bind(invoke, spawn_function, this), 
                                       stacksize,
                                       affinity);  
}


void fiber_group::join() {
  join_lock.lock();
  // no one else is waiting
  ASSERT_EQ(join_waiting, false);
  // otherwise, we need to wait
  join_waiting = true;
  while(threads_running.value != 0) {
    join_cond.wait(join_lock);
  }
  join_waiting = false;
  join_lock.unlock();
}

} // namespace graphlab


================================================
FILE: src/graphlab/parallel/fiber_group.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIBER_GROUP_HPP
#define GRAPHLAB_FIBER_GROUP_HPP
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
namespace graphlab {

/**
 * Defines a group of fibers. Analogous to the thread_group, but is meant
 * to run only little user-mode threads. It is important that fibers never 
 * block, since there is no way to context switch out from a blocked fiber.
 * The fiber_group uses the fiber_control singleton instance to manage its
 * fibers.
 */
class fiber_group {
 public:
  typedef fiber_control::affinity_type affinity_type;

 private:
  size_t stacksize;
  affinity_type affinity;
  atomic<size_t> threads_running;
  mutex join_lock;
  // to be triggered once the threads_running counter becomes 0
  conditional join_cond; 
  // set to true if someone is waiting on a join()
  bool join_waiting;

  inline void increment_running_counter() {
    threads_running.inc();
  }

  inline void decrement_running_counter() {
    // now, a bit of care is needed here
    size_t r = threads_running.dec();
    if (r == 0) {
      join_lock.lock();
      if (join_waiting) {
        join_cond.signal();
      }
      join_lock.unlock();
    }
  }

  // wraps the call so that we can do the appropriate termination
  static void invoke(const boost::function<void (void)>& spawn_function, 
                     fiber_group* group);

 public:


  fiber_group(size_t stacksize = 8192, 
              affinity_type affinity = fiber_control::all_affinity()) : 
      stacksize(stacksize), 
      affinity(affinity),
      join_waiting(false) { }


  /**
   * Sets the stacksize of each fiber.
   * Only takes effect for threads launched after this.
   */
  inline void set_stacksize(size_t new_stacksize) {
    stacksize = new_stacksize;
  }


  /**
   * Sets the affinity for each fiber.
   * Only takes effect for threads launched after this.
   */
  inline void set_affinity(affinity_type new_affinity) {
    affinity = new_affinity;
  }

  /**
   * Launch a single thread which calls spawn_function.
   */
  void launch(const boost::function<void (void)> &spawn_function);
              

  /**
   * Launch a single thread which calls spawn_function with worker affinity.
   */
  void launch(const boost::function<void (void)> &spawn_function, 
              affinity_type worker_affinity);


  /**
   * Launch a single thread which calls spawn_function with a single 
   * thread affinity
   */
  void launch(const boost::function<void (void)> &spawn_function,
              size_t worker_affinity);

  /** Waits for all threads to complete execution. const char* exceptions
   *  thrown by threads are forwarded to the join() function.
   */
  void join();

  /// Returns the number of running threads.
  inline size_t running_threads() {
    return threads_running;
  }
  //
  //! Destructor. Waits for all threads to complete execution
  inline ~fiber_group(){ join(); }

};

} // namespace graphlab 
#endif


================================================
FILE: src/graphlab/parallel/fiber_remote_request.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIBER_RPC_FUTURE_HPP
#define GRAPHLAB_FIBER_RPC_FUTURE_HPP
#include <graphlab/rpc/request_future.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
namespace graphlab {

/**
 * A implementation of the ireply_container interface
 * that will wait for rpc requests, but if the request is issued from within 
 * a fiber, will deschedule the fiber.
 */
struct fiber_reply_container: public dc_impl::ireply_container {
  dc_impl::blob val;
  mutex lock;
  conditional cond;
  // if wait is in a fiber, this will contain the ID of the fiber to wake up
  // If 0, the wait is not in a fiber.
  size_t waiting_tid;
  // true when the blob is assigned
  bool valready;

  fiber_reply_container():waiting_tid(0),valready(false) { }

  ~fiber_reply_container() {
    val.free();
  }

  void wait() {
    if (fiber_control::in_fiber()) {    
      // if I am in a fiber, use the deschedule mechanism 
      lock.lock();
      waiting_tid = fiber_control::get_tid();
      while(!valready) {
        // set the waiting tid value
        // deschedule myself. This will deschedule the fiber
        // and unlock the lock atomically
        fiber_control::deschedule_self(&lock.m_mut);
        // unlock the condition variable, this does not re-lock the lock
        lock.lock();
      }
      lock.unlock();
    } else {
      // Otherwise use the condition variable
      waiting_tid = 0;
      lock.lock();
      while(!valready) cond.wait(lock);
      lock.unlock();
    }
  }

  void receive(procid_t source, dc_impl::blob b) {
    lock.lock();
    val = b;
    valready = true;
    if (waiting_tid) {
      // it is a fiber. wake it up.
      fiber_control::schedule_tid(waiting_tid);
    } else {
      // not in fiber. This is just a condition signal
      cond.signal();
    }
    lock.unlock();
  }
  bool ready() const {
    return valready;
  }

  dc_impl::blob& get_blob() {
    return val;
  }
};


#if DOXYGEN_DOCUMENTATION


/**
 * \brief Performs a nonblocking RPC call to the target machine
 * to run the provided function pointer which has an expected return value.
 *
 * fiber_remote_request() calls the function "fn" on a target remote machine.
 * Provided arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.  fiber_remote_request() returns immediately a \ref
 * graphlab::request_future object which will allow you wait for the return
 * value.
 *
 * fiber_remote_request() has an identical interface to 
 * \ref graphlab::distributed_control::future_remote_request() , but has the 
 * additional capability that if a \ref graphlab::request_future::wait() is 
 * called on the request while within a fiber, it deschedules the fiber and
 * context switches, returning only when the future is ready. This allows
 * the future to be used from within a fiber.
 *
 * Since this function is not a member of the distributed_control class,
 * it uses the function \ref distributed_control::get_instance() to obtain
 * the last instance of the distribute_control class created. This should be
 * sufficient for most use cases.
 *
 * \ref graphlab::object_fiber_remote_request is the version of this function
 * for remotely calling class member functions.
 *
 * Example:
 * \code
 * // A print function is defined
 * int add_one(int i) {
 *   return i + 1;
 * }
 *
 * ... ...
 * // call the add_one function on machine 1
 * int i = 10;
 * graphlab::request_future<int> ret = fiber_remote_request(1, add_one, i);
 * // this is safe to do within a fiber as it will not halt other fibers.
 * int result = ret();
 * // result will be 11
 * \endcode
 *
 * \see graphlab::distributed_control::remote_request
 *      graphlab::distributed_control::future_remote_request
 *      graphlab::object_fiber_remote_request
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns a future templated around the same type as the return 
 *          value of the called function
 */
  request_future<RetVal> fiber_remote_request(procid_t targetmachine, Fn fn, ...);


/**
 * \brief Performs a nonblocking RPC call to the target machine
 * to run the provided function pointer which has an expected return value.
 *
 * object_fiber_remote_request() calls the function "fn" on a target remote machine.
 * Provided arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.  object_fiber_remote_request() returns immediately a \ref
 * graphlab::request_future object which will allow you wait for the return
 * value.
 *
 * object_fiber_remote_request() has an identical interface to 
 * \ref graphlab::dc_dist_object::future_remote_request() , but has the 
 * additional capability that if a \ref graphlab::request_future::wait() is 
 * called on the request while within a fiber, it deschedules the fiber and
 * context switches, returning only when the future is ready. This allows
 * the future to be used from within a fiber.
 *
 * Since this function is not a member of the \ref dc_dist_object class,
 * it needs to be provided a reference to the owning object's dc_dist_object.
 *
 * \ref graphlab::fiber_remote_request is the version of this function
 * for remotely calling global functions.
 *
 * Example:
 * \code
 * // A print function is defined in the distributed object
 * class distributed_obj_example {
 *  graphlab::dc_dist_object<distributed_obj_example> rmi;
 *   ... initialization and constructor ...
 *  private:
 *    int add_one(int i) {
 *      return i + 1;
 *    }
 *  public:
 *    int add_one_from_machine_1(int i) {
 *      // calls the add_one function on machine 1 with the argument i
 *      // this call returns immediately
 *      graphlab::request_future<int> future =
 *          object_future_remote_request(rmi, 1, &distributed_obj_example::add_one, i);
 *
 *      // ... we can do other stuff here
 *      // then when we want the answer
 *      // this is safe to do within a fiber as it will not halt other fibers.
 *      int result = future();
 *      return result;
 *    }
 * }
 * \endcode
 *
 * \see graphlab::dc_dist_object::remote_request
 *      graphlab::dc_dist_object::future_remote_request
 *      graphlab::fiber_remote_request
 *
 * \param rmiobj The dc_dist_object to use to send the request.
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns a future templated around the same type as the return 
 *          value of the called function
 */
  request_future<RetVal> object_fiber_remote_request(dc_dist_object<T> rmiobj,
                                                     procid_t targetmachine, 
                                                     Fn fn, ...);


#endif


#include <boost/preprocessor.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>

#define GENARGS(Z,N,_)  BOOST_PP_CAT(T, N) BOOST_PP_CAT(i, N)
#define GENI(Z,N,_) BOOST_PP_CAT(i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);

#define REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  BOOST_PP_TUPLE_ELEM(1,0,ARGS) (procid_t target, \
                                 F remote_function BOOST_PP_COMMA_IF(N) \
                                 BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
  request_future<__GLRPC_FRESULT> reply(new fiber_reply_container);      \
  distributed_control* dc = distributed_control::get_instance(); \
  ASSERT_NE(dc, NULL); \
  dc->custom_remote_request(target, reply.get_handle(), STANDARD_CALL, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  return reply; \
} 

BOOST_PP_REPEAT(7, REQUEST_INTERFACE_GENERATOR, (request_future<__GLRPC_FRESULT> fiber_remote_request) )

#include <graphlab/rpc/function_arg_types_undef.hpp>

#include <graphlab/rpc/mem_function_arg_types_def.hpp>
#define OBJECT_REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
template<typename RMI, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  BOOST_PP_TUPLE_ELEM(1,0,ARGS) (RMI& rmi, \
                                 procid_t target, \
                                 F remote_function BOOST_PP_COMMA_IF(N) \
                                 BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
  request_future<__GLRPC_FRESULT> reply(new fiber_reply_container);      \
  rmi.custom_remote_request(target, reply.get_handle(), STANDARD_CALL, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  return reply; \
} 


  /*
  Generates the interface functions. 3rd argument is a tuple (interface name, issue name, flags)
  */
BOOST_PP_REPEAT(7, OBJECT_REQUEST_INTERFACE_GENERATOR, (request_future<__GLRPC_FRESULT> object_fiber_remote_request) )

#include <graphlab/rpc/mem_function_arg_types_undef.hpp>

#undef OBJECT_REQUEST_INTERFACE_GENERATOR
#undef REQUEST_INTERFACE_GENERATOR
#undef GENARC
#undef GENT
#undef GENI
#undef GENARGS

} // namespace graphlab

#endif


================================================
FILE: src/graphlab/parallel/lockfree_push_back.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_PARALLEL_LOCKFREE_PUSHBACK_HPP
#define GRAPHLAB_PARALLEL_LOCKFREE_PUSHBACK_HPP
#include <graphlab/parallel/atomic.hpp>

namespace graphlab {

namespace lockfree_push_back_impl {
  struct idx_ref {
    idx_ref(): reference_count(0), idx(0) { }
    idx_ref(size_t idx): reference_count(0), idx(idx) { }
    
    volatile int reference_count;
    atomic<size_t> idx;
    enum {
      MAX_REF = 65536
    };
    
    inline void inc_ref() {
      while (1) {
        int curref = reference_count;
        if ((curref & MAX_REF) == 0 &&
            atomic_compare_and_swap(reference_count, curref, curref + 1)) {
          break;
        }
      }      
    }

    inline void wait_till_no_ref() {
      while((reference_count & (MAX_REF - 1)) != 0);
    }
    
    inline void dec_ref() {
      __sync_fetch_and_sub(&reference_count, 1);
    }

    inline void flag_ref() {
      __sync_fetch_and_xor(&reference_count, MAX_REF);
    }
    
    inline size_t inc_idx() {
      return idx.inc_ret_last();
    }

    inline size_t inc_idx(size_t n) {
      return idx.inc_ret_last(n);
    }
  };
} // lockfree_push_back_impl
  
/**
 * Provides a lock free way to insert elements to the end
 * of a container. Container must provide 3 functions.
 *  - T& operator[](size_t idx)
 *  - void resize(size_t len)
 *  - size_t size()
 *
 * resize(n) must guarantee that size() >= n.
 * T& operator[](size_t idx) must succeed for idx < size() and must be 
 * safely executeable in parallel.
 * size() must be safely executeable in parallel with resize().
 */
template <typename Container, typename T = typename Container::value_type>
class lockfree_push_back {
  private:
    Container& container;
    lockfree_push_back_impl::idx_ref cur;
    mutex mut;
    float scalefactor;
  public:
    lockfree_push_back(Container& container, size_t startidx, float scalefactor = 2):
                            container(container),cur(startidx), scalefactor(scalefactor) { }

    size_t size() const {
      return cur.idx.value;
    }

    void set_size(size_t s) {
      cur.idx.value = s;
    }

    template <typename Iterator>
    size_t push_back(Iterator begin, Iterator end) {
      size_t numel = std::distance(begin, end);
      size_t putpos = cur.inc_idx(numel);
      size_t endidx = putpos + numel;
      while(1) {
        cur.inc_ref();
        if (endidx <= container.size()) {
          while(putpos < endidx) {
            container[putpos] = (*begin);
            ++putpos; ++begin;
          }
          cur.dec_ref();
          break;
        }
        else {
          cur.dec_ref();

          if (mut.try_lock()) {
            // ok. we need to resize
            // flag the reference and wait till there are no more references
            cur.flag_ref();
            cur.wait_till_no_ref();
            // we are exclusive here. resize
            if (endidx > container.size()) {
              container.resize(std::max<size_t>(endidx, container.size() * scalefactor));
            }
            while(putpos < endidx) {
              container[putpos] = (*begin);
              ++putpos; ++begin;
            }
            cur.flag_ref();
            mut.unlock();
            break;
          }
        }
      }
      return putpos;
    }
    
    bool query(size_t item, T& value) {
      bool ret = false;
      cur.inc_ref();
      if (item < cur.idx) {
        value = container[item];
        ret = true;
      }
      cur.dec_ref();
      return ret;
    }

    T* query(size_t item) {
      T* ret = NULL;
      cur.inc_ref();
      if (item < cur.idx) {
        ret = &(container[item]);
      }
      cur.dec_ref();
      return ret;
    }

    bool query_unsafe(size_t item, T& value) {
      bool ret = false;
      if (item < cur.idx) {
        value = container[item];
        ret = true;
      }
      return ret;
    }

    T* query_unsafe(size_t item) {
      T* ret = NULL;
      if (item < cur.idx) {
        ret = &(container[item]);
      }
      return ret;
    }


    size_t push_back(const T& t) {
      size_t putpos = cur.inc_idx();
      while(1) {
        cur.inc_ref();
        if (putpos < container.size()) {
          container[putpos] = t;
          cur.dec_ref();
          break;
        }
        else {
          cur.dec_ref();

          if (mut.try_lock()) {
            // ok. we need to resize
            // flag the reference and wait till there are no more references
            cur.flag_ref();
            cur.wait_till_no_ref();
            // we are exclusive here. resize
            if (putpos >= container.size()) {
              container.resize(std::max<size_t>(putpos + 1, container.size() * scalefactor));
            }
            container[putpos] = t;
            cur.flag_ref();
            mut.unlock();
            break;
          }
        }
      }
      return putpos;
    }
};

} // namespace graphlab
#endif


================================================
FILE: src/graphlab/parallel/mutex.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_MUTEX_HPP
#define GRAPHLAB_MUTEX_HPP


#include <pthread.h>
#include <graphlab/logger/assertions.hpp>


namespace graphlab {

  /**
   * \ingroup util
   *
   * Simple wrapper around pthread's mutex.
   * Before you use, see \ref parallel_object_intricacies.
   */
  class mutex {
  public:
    // mutable not actually needed
    mutable pthread_mutex_t m_mut;
    /// constructs a mutex
    mutex() {
      int error = pthread_mutex_init(&m_mut, NULL);
      ASSERT_TRUE(!error);
    }
    /** Copy constructor which does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize,
        rather than the standard constructor.    */
    mutex(const mutex&) {
      int error = pthread_mutex_init(&m_mut, NULL);
      ASSERT_TRUE(!error);
    }

    ~mutex(){
      int error = pthread_mutex_destroy( &m_mut );
      ASSERT_TRUE(!error);
    }

    // not copyable
    void operator=(const mutex& m) { }

    /// Acquires a lock on the mutex
    inline void lock() const {
      int error = pthread_mutex_lock( &m_mut  );
      // if (error) std::cout << "mutex.lock() error: " << error << std::endl;
      ASSERT_TRUE(!error);
    }
    /// Releases a lock on the mutex
    inline void unlock() const {
      int error = pthread_mutex_unlock( &m_mut );
      ASSERT_TRUE(!error);
    }
    /// Non-blocking attempt to acquire a lock on the mutex
    inline bool try_lock() const {
      return pthread_mutex_trylock( &m_mut ) == 0;
    }
    friend class conditional;
  }; // End of Mutex


  /**
   * \ingroup util
   *
   * Simple wrapper around pthread's recursive mutex.
   * Before you use, see \ref parallel_object_intricacies.
   */
  class recursive_mutex {
  public:
    // mutable not actually needed
    mutable pthread_mutex_t m_mut;
    /// constructs a mutex
    recursive_mutex() {
      pthread_mutexattr_t attr;
      int error = pthread_mutexattr_init(&attr);
      ASSERT_TRUE(!error);
      error = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
      ASSERT_TRUE(!error);
      error = pthread_mutex_init(&m_mut, &attr);
      ASSERT_TRUE(!error);
      pthread_mutexattr_destroy(&attr);
    }
    /** Copy constructor which does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize,
        rather than the standard constructor.    */
    recursive_mutex(const recursive_mutex&) {
      pthread_mutexattr_t attr;
      int error = pthread_mutexattr_init(&attr);
      ASSERT_TRUE(!error);
      error = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
      ASSERT_TRUE(!error);
      error = pthread_mutex_init(&m_mut, &attr);
      ASSERT_TRUE(!error);
      pthread_mutexattr_destroy(&attr);
    }

    ~recursive_mutex(){
      int error = pthread_mutex_destroy( &m_mut );
      ASSERT_TRUE(!error);
    }

    // not copyable
    void operator=(const recursive_mutex& m) { }

    /// Acquires a lock on the mutex
    inline void lock() const {
      int error = pthread_mutex_lock( &m_mut  );
      // if (error) std::cout << "mutex.lock() error: " << error << std::endl;
      ASSERT_TRUE(!error);
    }
    /// Releases a lock on the mutex
    inline void unlock() const {
      int error = pthread_mutex_unlock( &m_mut );
      ASSERT_TRUE(!error);
    }
    /// Non-blocking attempt to acquire a lock on the mutex
    inline bool try_lock() const {
      return pthread_mutex_trylock( &m_mut ) == 0;
    }
    friend class conditional;
  }; // End of Mutex


} // end of graphlab namespace


#endif


================================================
FILE: src/graphlab/parallel/parallel_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// #include <atomic.hpp>
// #include <graphlab/parallel/pthread_tools.hpp>


================================================
FILE: src/graphlab/parallel/pthread_tools.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/parallel/pthread_tools.hpp>
#include <boost/bind.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab { 

  // Some magic to ensure that keys are created at program startup =========>
  void destroy_tls_data(void* ptr);
  struct thread_keys {
    pthread_key_t GRAPHLAB_TSD_ID;
    thread_keys() : GRAPHLAB_TSD_ID(0) { 
      pthread_key_create(&GRAPHLAB_TSD_ID,
                         destroy_tls_data);
    }
  };
  // This function is to be called prior to any thread starting
  // execution to ensure that the static member keys is constructed
  // prior to any threads launching
  static pthread_key_t get_tsd_id() {
    static thread_keys keys;
    return keys.GRAPHLAB_TSD_ID;
  }
  // This forces get_tsd_id to be called prior to main.
  static pthread_key_t __unused_init_keys__(get_tsd_id());
  
  // the combination of the two mechanisms above will force the
  // thread local store to be initialized
  // 1: before main
  // 2: before any other global variables which spawn threads
  
  // END MAGIC =============================================================>

// -----------------------------------------------------------------
//                 Thread Object Static Members 
// -----------------------------------------------------------------
  

  /**
   * Create thread specific data
   */
  thread::tls_data* create_tls_data(size_t thread_id = 0) {
    // Require that the data not yet exist
    assert(pthread_getspecific(get_tsd_id()) == NULL);
    // Create the data
    thread::tls_data* data =
      new thread::tls_data(thread_id);
    assert(data != NULL);
    // Set the data
    pthread_setspecific(get_tsd_id(), data);
    // Return the associated tsd
    return data;
  } // end create the thread specific data

  /**
   * This function tries to get the thread specific data.  If no
   * thread specific data has been associated with the thread than it
   * is created.
   */
  thread::tls_data& thread::get_tls_data() {
    // get the tsd
    tls_data* tsd =
      reinterpret_cast<tls_data*>
      (pthread_getspecific(get_tsd_id()));
    // If no tsd be has been associated, create one
    if(tsd == NULL) tsd = create_tls_data();
    assert(tsd != NULL);
    return *tsd;
  } // end of get thread specific data

  
  /**
   * Create thread specific data
   */
  void destroy_tls_data(void* ptr) {
    thread::tls_data* tsd =
      reinterpret_cast<thread::tls_data*>(ptr);
    if(tsd != NULL) {
      delete tsd;
    }
  } // end destroy the thread specific data

 
  //! Little helper function used to launch threads
  void* thread::invoke(void *_args) {
    void* retval = NULL;
    thread::invoke_args* args = static_cast<thread::invoke_args*>(_args);
    // Create the graphlab thread specific data
    create_tls_data(args->m_thread_id);    
    //! Run the users thread code
    try {
      args->spawn_routine();
    }
    catch (const char* msg) {
      retval = (void*)msg;
    }
    //! Delete the arguments 
    delete args;
    
    //! Properly kill the thread
    thread_destroy_callback();
    return retval;
  } // end of invoke

  
  /**
   * This static method joins the invoking thread with the other
   * thread object.  This thread will not return from the join
   * routine until the other thread complets it run.
   */
  void thread::join(thread& other) {
    void *status = NULL;
    // joint the first element
    int error = 0;
    if(other.active()) {
      error = pthread_join( other.m_p_thread, &status);
      if (status != NULL) {
        const char* strstatus = (const char*) status;
        throw strstatus;
      }
    }
    if(error) {
      std::cout << "Major error in join" << std::endl;
      std::cout << "pthread_join() returned error " << error << std::endl;
      exit(EXIT_FAILURE);
    }
  } // end of join


  /**
   * Return the number processing units (individual cores) on this
   * system
   */
  size_t thread::cpu_count() {
#if defined __linux__
  char* jobsStr = getenv("GRAPHLAB_THREADS_PER_WORKER");
  if (jobsStr) {
    int nThreads = atoi(jobsStr);
    if ( nThreads < 2 ) return 2;
     else return nThreads;
  }
  else {
    return sysconf(_SC_NPROCESSORS_CONF);
  }
#elif defined(__MACH__) && defined(_SC_NPROCESSORS_ONLN)
    return sysconf (_SC_NPROCESSORS_ONLN);
#elif defined(__MACH__) && defined(HW_NCPU)
    int ncpus = 1;
    size_t len = sizeof(ncpus);
    sysctl((int[2]) {CTL_HW, HW_NCPU}, 2, &ncpus, &len, NULL, 0);
    return ncpus;
#else
    return 0;
#endif
  } // end of cpu count
    
   /**
     * Allow defining a callback when thread is destroyed.
     * This is needed at least from Java JNI, where we have to detach
     * thread from JVM before it dies.
     */
   void (*__thr_callback)()  = NULL;

   void thread::thread_destroy_callback() {
     if (__thr_callback != NULL) __thr_callback();
   }
   
   void thread::set_thread_destroy_callback(void (*callback)()) {
     __thr_callback = callback;
   }


// -----------------------------------------------------------------
//                 Thread Object Public Members 
// -----------------------------------------------------------------

  
  void thread::launch(const boost::function<void (void)> &spawn_routine) {
    get_tsd_id();
    ASSERT_FALSE(thread_started);
    // fill in the thread attributes
    pthread_attr_t attr;
    int error = 0;
    error = pthread_attr_init(&attr);
    ASSERT_TRUE(!error);
    error = pthread_attr_setstacksize(&attr, m_stack_size);
    ASSERT_TRUE(!error);
    error = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    ASSERT_TRUE(!error);       
    error =
      pthread_create(&m_p_thread, 
                     &attr, 
                     invoke,  
                     static_cast<void*>(new invoke_args(m_thread_id, 
                                                        spawn_routine)) );
    thread_started = true;
    if(error) {
      std::cout << "Major error in thread_group.launch (pthread_create). Error: " 
                << error << std::endl;
      exit(EXIT_FAILURE);
    }
    // destroy the attribute object
    error = pthread_attr_destroy(&attr);
    ASSERT_TRUE(!error);
  }
  
  void thread::launch(const boost::function<void (void)> &spawn_routine, 
                      size_t cpu_id){
      get_tsd_id();
      // if this is not a linux based system simply invoke start and
      // return;
#ifndef __linux__
      launch(spawn_routine);
      return;
#else
      ASSERT_FALSE(thread_started);
      if (cpu_id  == size_t(-1)) {
        launch(spawn_routine);
        return;
      }
      if (cpu_count() > 0) {
        cpu_id = cpu_id % cpu_count();
      }
      else {
        // unknown CPU count
        launch(spawn_routine);
        return;
      }
      
      // fill in the thread attributes
      pthread_attr_t attr;
      int error = 0;
      error = pthread_attr_init(&attr);
      ASSERT_TRUE(!error);
      error = pthread_attr_setstacksize(&attr, m_stack_size);
      ASSERT_TRUE(!error);
      error = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
      ASSERT_TRUE(!error);

#ifdef HAS_SET_AFFINITY
      // Set Processor Affinity masks (linux only)
      cpu_set_t cpu_set;
      CPU_ZERO(&cpu_set);
      CPU_SET(cpu_id % CPU_SETSIZE, &cpu_set);

      pthread_attr_setaffinity_np(&attr, sizeof(cpu_set), &cpu_set);
#endif
          
      // Launch the thread
      error = pthread_create(&m_p_thread, 
                             &attr, 
                             invoke,
                             static_cast<void*>(new invoke_args(m_thread_id, 
                                                                spawn_routine)));
      thread_started = true;
      if(error) {
        std::cout << "Major error in thread_group.launch" << std::endl;
        std::cout << "pthread_create() returned error " << error << std::endl;
        exit(EXIT_FAILURE);
      }
      
      
      // destroy the attribute object
      error = pthread_attr_destroy(&attr);
      ASSERT_TRUE(!error);
#endif
    }
      
  // -----------------------------------------------------------------
  //                 Thread Group Object Public Members 
  // -----------------------------------------------------------------
  // thread group exception forwarding is a little more complicated
  // because it has to be able to catch it on a bunch of threads

  void thread_group::invoke(boost::function<void (void)> spawn_function,
                        thread_group *group) {
    const char* retval = NULL;
    try {
      spawn_function();
    }
    catch (const char* c) {
      // signal the thread group to join this thread
      retval = c;
    }
      group->mut.lock();
      group->joinqueue.push(std::make_pair(pthread_self(), retval));
      group->cond.signal();
      group->mut.unlock();

  }
                        

  void thread_group::launch(const boost::function<void (void)> &spawn_function) {
    // Create a thread object and launch it. 
    // We do not need to keep a copy of the thread around
    thread local_thread(m_thread_counter++);
    mut.lock();
    threads_running++;
    mut.unlock();
    local_thread.launch(boost::bind(thread_group::invoke, spawn_function, this));
  } 


  void thread_group::launch(const boost::function<void (void)> &spawn_function, 
                            size_t cpu_id) {
    if (cpu_id == size_t(-1)) {
      launch(spawn_function);
      return;
    }
    // Create a thread object
    thread local_thread(m_thread_counter++);
    mut.lock();
    threads_running++;
    mut.unlock();
    local_thread.launch(boost::bind(thread_group::invoke, spawn_function, this), 
                        cpu_id);
  } // end of launch

  void thread_group::join() {
    mut.lock();
    while(threads_running > 0) {
      // if no threads are joining. wait
      while (joinqueue.empty()) cond.wait(mut);      
      // a thread is joining
      std::pair<pthread_t, const char*> joining_thread = joinqueue.front();
      joinqueue.pop();
      threads_running--;
      // Reset the thread counter after killing all threads
      if(threads_running == 0) m_thread_counter = 0;
      // unlock here since I might be in join for a little while
      mut.unlock();
      void *unusedstatus = NULL;
      pthread_join(joining_thread.first, &unusedstatus);
      // if there is a return value
      // throw it. It is safe to throw here since I have the mutex unlocked.
      if (joining_thread.second) {
        throw(joining_thread.second);
      }
      mut.lock();
    }
    mut.unlock();    
  } // end of join


} // end of namespace graphlab


================================================
FILE: src/graphlab/parallel/pthread_tools.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_PTHREAD_TOOLS_HPP
#define GRAPHLAB_PTHREAD_TOOLS_HPP


#include <cstdlib>
#include <pthread.h>
#include <semaphore.h>
#include <sched.h>
#include <signal.h>
#include <sys/time.h>
#include <vector>
#include <list>
#include <queue>
#include <iostream>
#include <boost/function.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/atomic_ops.hpp>
#include <graphlab/util/generics/any.hpp>
#include <graphlab/util/branch_hints.hpp>
#include <boost/unordered_map.hpp>
#undef _POSIX_SPIN_LOCKS
#define _POSIX_SPIN_LOCKS -1


#include <graphlab/parallel/mutex.hpp>


namespace graphlab {


#if _POSIX_SPIN_LOCKS >= 0
  /**
   * \ingroup util
   *
   * Wrapper around pthread's spinlock.
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class spinlock {
  private:
    // mutable not actually needed
    mutable pthread_spinlock_t m_spin;
  public:
    /// constructs a spinlock
    spinlock () {
      int error = pthread_spin_init(&m_spin, PTHREAD_PROCESS_PRIVATE);
      ASSERT_TRUE(!error);
    }
    
    /** Copy constructor which does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize, 
        rather than the standard constructor.    */
    spinlock(const spinlock&) {
      int error = pthread_spin_init(&m_spin, PTHREAD_PROCESS_PRIVATE);
      ASSERT_TRUE(!error);
    }
    
    // not copyable
    void operator=(const spinlock& m) { }


    /// Acquires a lock on the spinlock
    inline void lock() const { 
      int error = pthread_spin_lock( &m_spin  );
      ASSERT_TRUE(!error);
    }
    /// Releases a lock on the spinlock
    inline void unlock() const {
      int error = pthread_spin_unlock( &m_spin );
      ASSERT_TRUE(!error);
    }
    /// Non-blocking attempt to acquire a lock on the spinlock
    inline bool try_lock() const {
      return pthread_spin_trylock( &m_spin ) == 0;
    }
    ~spinlock(){
      int error = pthread_spin_destroy( &m_spin );
      ASSERT_TRUE(!error);
    }
    friend class conditional;
  }; // End of spinlock
#define SPINLOCK_SUPPORTED 1
#else
  //! if spinlock not supported, it is typedef it to a mutex.
  typedef mutex spinlock;
#define SPINLOCK_SUPPORTED 0
#endif

  
  /**
   * \ingroup util
   *If pthread spinlock is not implemented, 
   * this provides a simple alternate spin lock implementation.
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class simple_spinlock {
  private:
    // mutable not actually needed
    mutable volatile char spinner;
  public:
    /// constructs a spinlock
    simple_spinlock () {
      spinner = 0;
    }
    
    /** Copy constructor which does not copy. Do not use!
    Required for compatibility with some STL implementations (LLVM).
    which use the copy constructor for vector resize, 
    rather than the standard constructor.    */
    simple_spinlock(const simple_spinlock&) {
      spinner = 0;
    }
    
    // not copyable
    void operator=(const simple_spinlock& m) { }

    
    /// Acquires a lock on the spinlock
    inline void lock() const { 
      while(spinner == 1 || __sync_lock_test_and_set(&spinner, 1));
    }
    /// Releases a lock on the spinlock
    inline void unlock() const {
      __sync_synchronize();
      spinner = 0;
    }
    /// Non-blocking attempt to acquire a lock on the spinlock
    inline bool try_lock() const {
      return (__sync_lock_test_and_set(&spinner, 1) == 0);
    }
    ~simple_spinlock(){
      ASSERT_TRUE(spinner == 0);
    }
  };
  

  /**
   * \ingroup util
   *If pthread spinlock is not implemented, 
   * this provides a simple alternate spin lock implementation.
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class padded_simple_spinlock {
  private:
    // mutable not actually needed
    mutable volatile char spinner;
    // char padding[63];
  public:
    /// constructs a spinlock
    padded_simple_spinlock () {
      spinner = 0;
    }
    
    /** Copy constructor which does not copy. Do not use!
    Required for compatibility with some STL implementations (LLVM).
    which use the copy constructor for vector resize, 
    rather than the standard constructor.    */
    padded_simple_spinlock(const padded_simple_spinlock&) {
      spinner = 0;
    }
    
    // not copyable
    void operator=(const padded_simple_spinlock& m) { }

    
    /// Acquires a lock on the spinlock
    inline void lock() const { 
      while(spinner == 1 || __sync_lock_test_and_set(&spinner, 1));
    }
    /// Releases a lock on the spinlock
    inline void unlock() const {
      __sync_synchronize();
      spinner = 0;
    }
    /// Non-blocking attempt to acquire a lock on the spinlock
    inline bool try_lock() const {
      return (__sync_lock_test_and_set(&spinner, 1) == 0);
    }
    ~padded_simple_spinlock(){
      ASSERT_TRUE(spinner == 0);
    }
  };
  

  /**
   * \ingroup util
   * Wrapper around pthread's condition variable
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class conditional {
  private:
    mutable pthread_cond_t  m_cond;

  public:
    conditional() {
      int error = pthread_cond_init(&m_cond, NULL);
      ASSERT_TRUE(!error);
    }
    
    /** Copy constructor which does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize, 
        rather than the standard constructor.    */
    conditional(const conditional &) {
      int error = pthread_cond_init(&m_cond, NULL);
      ASSERT_TRUE(!error);
    }
    
    // not copyable
    void operator=(const conditional& m) { }

    
    /// Waits on condition. The mutex must already be acquired. Caller
    /// must be careful about spurious wakes.
    inline void wait(const mutex& mut) const {
      int error = pthread_cond_wait(&m_cond, &mut.m_mut);
      ASSERT_TRUE(!error);
    }
    /// Like wait() but with a time limit of "sec" seconds
    inline int timedwait(const mutex& mut, size_t sec) const {
      struct timespec timeout;
      struct timeval tv;
      struct timezone tz;
      gettimeofday(&tv, &tz);
      timeout.tv_nsec = tv.tv_usec * 1000;
      timeout.tv_sec = tv.tv_sec + (time_t)sec;
      return pthread_cond_timedwait(&m_cond, &mut.m_mut, &timeout);
    }
    /// Like wait() but with a time limit of "ms" milliseconds
    inline int timedwait_ms(const mutex& mut, size_t ms) const {
      struct timespec timeout;
      struct timeval tv;
      gettimeofday(&tv, NULL);
      // convert ms to s and ns
      size_t s = ms / 1000;
      ms = ms % 1000;
      size_t ns = ms * 1000000;
      // convert timeval to timespec
      timeout.tv_nsec = tv.tv_usec * 1000;
      timeout.tv_sec = tv.tv_sec;
      
      // add the time
      timeout.tv_nsec += (suseconds_t)ns;
      timeout.tv_sec += (time_t)s;
      // shift the nsec to sec if overflow
      if (timeout.tv_nsec > 1000000000) {
        timeout.tv_sec ++;
        timeout.tv_nsec -= 1000000000;
      }
      return pthread_cond_timedwait(&m_cond, &mut.m_mut, &timeout);
    }
    /// Like wait() but with a time limit of "ns" nanoseconds
    inline int timedwait_ns(const mutex& mut, size_t ns) const {
      struct timespec timeout;
      struct timeval tv;
      gettimeofday(&tv, NULL);
      assert(ns > 0);
      // convert ns to s and ns
      size_t s = ns / 1000000;
      ns = ns % 1000000;

      // convert timeval to timespec
      timeout.tv_nsec = tv.tv_usec * 1000;
      timeout.tv_sec = tv.tv_sec;
      
      // add the time
      timeout.tv_nsec += (suseconds_t)ns;
      timeout.tv_sec += (time_t)s;
      // shift the nsec to sec if overflow
      if (timeout.tv_nsec > 1000000000) {
        timeout.tv_sec ++;
        timeout.tv_nsec -= 1000000000;
      }
      return pthread_cond_timedwait(&m_cond, &mut.m_mut, &timeout);
    }
    /// Signals one waiting thread to wake up
    inline void signal() const {
      int error = pthread_cond_signal(&m_cond);
      ASSERT_TRUE(!error);
    }
    /// Wakes up all waiting threads
    inline void broadcast() const {
      int error = pthread_cond_broadcast(&m_cond);
      ASSERT_TRUE(!error);
    }
    ~conditional() {
      int error = pthread_cond_destroy(&m_cond);
      ASSERT_TRUE(!error);
    }
  }; // End conditional


#ifdef __APPLE__
  /**
   * Custom implementation of a semaphore.
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class semaphore {
  private:
    conditional cond;
    mutex mut;
    mutable volatile size_t semvalue;
    mutable volatile size_t waitercount;

  public:
    semaphore() {
      semvalue = 0;
      waitercount = 0;
    }
    /** Copy constructor which does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize, 
        rather than the standard constructor.    */
    semaphore(const semaphore&) {
      semvalue = 0;
      waitercount = 0;
    }
    
    // not copyable
    void operator=(const semaphore& m) { }

    inline void post() const {
      mut.lock();
      if (waitercount > 0) {
        cond.signal();
      }
      semvalue++;
      mut.unlock();
    }
    inline void wait() const {
      mut.lock();
      waitercount++;
      while (semvalue == 0) {
        cond.wait(mut);
      }
      waitercount--;
      semvalue--;
      mut.unlock();
    }
    ~semaphore() {
      ASSERT_TRUE(waitercount == 0);
      ASSERT_TRUE(semvalue == 0);
    }
  }; // End semaphore
#else
  /**
   * Wrapper around pthread's semaphore
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class semaphore {
  private:
    mutable sem_t  m_sem;

  public:
    semaphore() {
      int error = sem_init(&m_sem, 0,0);
      ASSERT_TRUE(!error);
    }
    
    /** Copy constructor with does not copy. Do not use!
        Required for compatibility with some STL implementations (LLVM).
        which use the copy constructor for vector resize, 
        rather than the standard constructor.    */
    semaphore(const semaphore&) {
      int error = sem_init(&m_sem, 0,0);
      ASSERT_TRUE(!error);
    }
    
    // not copyable
    void operator=(const semaphore& m) { }

    inline void post() const {
      int error = sem_post(&m_sem);
      ASSERT_TRUE(!error);
    }
    inline void wait() const {
      int error = sem_wait(&m_sem);
      ASSERT_TRUE(!error);
    }
    ~semaphore() {
      int error = sem_destroy(&m_sem);
      ASSERT_TRUE(!error);
    }
  }; // End semaphore
#endif
  

#define atomic_xadd(P, V) __sync_fetch_and_add((P), (V))
#define cmpxchg(P, O, N) __sync_val_compare_and_swap((P), (O), (N))
#define atomic_inc(P) __sync_add_and_fetch((P), 1)
#define atomic_add(P, V) __sync_add_and_fetch((P), (V))
#define atomic_set_bit(P, V) __sync_or_and_fetch((P), 1<<(V))
#define cpu_relax() asm volatile("pause\n": : :"memory")

  /**
   * \class spinrwlock
   * rwlock built around "spinning"
   * source adapted from http://locklessinc.com/articles/locks/
   * "Scalable Reader-Writer Synchronization for Shared-Memory Multiprocessors"
   * John Mellor-Crummey and Michael Scott
   */
  class spinrwlock {

    union rwticket {
      unsigned u;
      unsigned short us;
      __extension__ struct {
        unsigned char write;
        unsigned char read;
        unsigned char users;
      } s;
    };
    mutable bool writing;
    mutable volatile rwticket l;
  public:
    spinrwlock() {
      memset(const_cast<rwticket*>(&l), 0, sizeof(rwticket));
    }
    inline void writelock() const {
      unsigned me = atomic_xadd(&l.u, (1<<16));
      unsigned char val = (unsigned char)(me >> 16);
    
      while (val != l.s.write) asm volatile("pause\n": : :"memory");
      writing = true;
    }

    inline void wrunlock() const{
      rwticket t = *const_cast<rwticket*>(&l);

      t.s.write++;
      t.s.read++;
    
      *(volatile unsigned short *) (&l) = t.us;
      writing = false;
      __asm("mfence");
    }

    inline void readlock() const {
      unsigned me = atomic_xadd(&l.u, (1<<16));
      unsigned char val = (unsigned char)(me >> 16);
    
      while (val != l.s.read) asm volatile("pause\n": : :"memory");
      l.s.read++;
    }

    inline void rdunlock() const {
      atomic_inc(&l.s.write);
    }
  
    inline void unlock() const {
      if (!writing) rdunlock();
      else wrunlock();
    }
  };


#define RW_WAIT_BIT 0
#define RW_WRITE_BIT 1
#define RW_READ_BIT 2

#define RW_WAIT 1
#define RW_WRITE 2
#define RW_READ 4

  struct spinrwlock2 {
    mutable unsigned int l;

    spinrwlock2():l(0) {}
    void writelock() const {
      while (1) {
        unsigned state = l;

        /* No readers or writers? */
        if (state < RW_WRITE)
        {
          /* Turn off RW_WAIT, and turn on RW_WRITE */
          if (cmpxchg(&l, state, RW_WRITE) == state) return;

          /* Someone else got there... time to wait */
          state = l;
        }

        /* Turn on writer wait bit */
        if (!(state & RW_WAIT)) atomic_set_bit(&l, RW_WAIT_BIT);

        /* Wait until can try to take the lock */
        while (l > RW_WAIT) cpu_relax();
      }
    }

    void wrunlock() const {
      atomic_add(&l, -RW_WRITE);
    }

    void readlock() const {
      while (1) {
        /* A writer exists? */
        while (l & (RW_WAIT | RW_WRITE)) cpu_relax();

        /* Try to get read lock */
        if (!(atomic_xadd(&l, RW_READ) & (RW_WAIT | RW_WRITE))) return;

        /* Undo */
        atomic_add(&l, -RW_READ);
      }
    }

    void rdunlock() const {
      atomic_add(&l, -RW_READ);
    }
  };

#undef atomic_xadd
#undef cmpxchg
#undef atomic_inc
#undef atomic_set_bit
#undef atomic_add
#undef RW_WAIT_BIT 
#undef RW_WRITE_BIT 
#undef RW_READ_BIT
#undef RW_WAIT
#undef RW_WRITE 
#undef RW_READ


  /**
   * \class rwlock
   * Wrapper around pthread's rwlock
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class rwlock {
  private:
    mutable pthread_rwlock_t m_rwlock;
   public:
    rwlock() {
      int error = pthread_rwlock_init(&m_rwlock, NULL);
      ASSERT_TRUE(!error);
    }
    ~rwlock() {
      int error = pthread_rwlock_destroy(&m_rwlock);
      ASSERT_TRUE(!error);
    }
 
    // not copyable
    void operator=(const rwlock& m) { }
   
    /** 
     * \todo: Remove!  
     *
     * Copy constructor which does not copy. Do not use!  Required for
     * compatibility with some STL implementations (LLVM).  which use
     * the copy constructor for vector resize, rather than the
     * standard constructor.  */
    rwlock(const rwlock &) {
      int error = pthread_rwlock_init(&m_rwlock, NULL);
      ASSERT_TRUE(!error);
    }

    inline void readlock() const {
      pthread_rwlock_rdlock(&m_rwlock);
      //ASSERT_TRUE(!error);
    }
    inline void writelock() const {
      pthread_rwlock_wrlock(&m_rwlock);
      //ASSERT_TRUE(!error);
    }
    inline bool try_readlock() const {
      return pthread_rwlock_tryrdlock(&m_rwlock) == 0;
    }
    inline bool try_writelock() const {
      return pthread_rwlock_trywrlock(&m_rwlock) == 0;
    }
    inline void unlock() const {
      pthread_rwlock_unlock(&m_rwlock);
      //ASSERT_TRUE(!error);
    }
    inline void rdunlock() const {
      unlock();
    }
    inline void wrunlock() const {
      unlock();
    }
  }; // End rwlock


  /**
   * \ingroup util
   * This is a simple sense-reversing barrier implementation.
   * In addition to standard barrier functionality, this also
   * provides a "cancel" function which can be used to destroy
   * the barrier, releasing all threads stuck in the barrier.
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
  class cancellable_barrier {
  private:
    graphlab::mutex mutex;
    graphlab::conditional conditional;
    mutable int needed;
    mutable int called;   
    
    mutable bool barrier_sense;
    mutable bool barrier_release;
    bool alive;

    // not copyconstructible
    cancellable_barrier(const cancellable_barrier&) { }


  public:
    /// Construct a barrier which will only fall when numthreads enter
    cancellable_barrier(size_t numthreads) {
      needed = numthreads;
      called = 0;
      barrier_sense = false;
      barrier_release = true;
      alive = true;
    }

    // not copyable
    void operator=(const cancellable_barrier& m) { }

    void resize_unsafe(size_t numthreads) {
      needed = numthreads;
    }
    
    /**
     * \warning: This barrier is safely NOT reusable with this cancel
     * definition
     */
    inline void cancel() {
      alive = false;
      conditional.broadcast();
    }
    /// Wait on the barrier until numthreads has called wait
    inline void wait() const {
      if (!alive) return;
      mutex.lock();
      // set waiting;
      called++;
      bool listening_on = barrier_sense;
      if (called == needed) {
        // if I have reached the required limit, wait up. Set waiting
        // to 0 to make sure everyone wakes up
        called = 0;
        barrier_release = barrier_sense;
        barrier_sense = !barrier_sense;
        // clear all waiting
        conditional.broadcast();
      } else {
        // while no one has broadcasted, sleep
        while(barrier_release != listening_on && alive) conditional.wait(mutex);
      }
      mutex.unlock();
    }
  }; // end of conditional
  

  /**
   * \class barrier
   * Wrapper around pthread's barrier
   *
   * Before you use, see \ref parallel_object_intricacies.
   */
#ifdef __linux__
  /**
   * \ingroup util
   * Wrapper around pthread's barrier
   */
  class barrier {
  private:
    mutable pthread_barrier_t m_barrier;
    // not copyconstructable
    barrier(const barrier&) { }
  public:
    /// Construct a barrier which will only fall when numthreads enter
    barrier(size_t numthreads) { 
      pthread_barrier_init(&m_barrier, NULL, (unsigned)numthreads); }    
    // not copyable
    void operator=(const barrier& m) { }
    void resize_unsafe(size_t numthreads) {
      pthread_barrier_destroy(&m_barrier);
      pthread_barrier_init(&m_barrier, NULL, (unsigned)numthreads);
    }
    ~barrier() { pthread_barrier_destroy(&m_barrier); }
    /// Wait on the barrier until numthreads has called wait
    inline void wait() const { pthread_barrier_wait(&m_barrier); }
  };

#else   
   /* In some systems, pthread_barrier is not available.
   */
  typedef cancellable_barrier barrier;
#endif


  inline void prefetch_range(void *addr, size_t len) {
    char *cp;
    char *end = (char*)(addr) + len;

    for (cp = (char*)(addr); cp < end; cp += 64) __builtin_prefetch(cp, 0); 
  }
  inline void prefetch_range_write(void *addr, size_t len) {
    char *cp;
    char *end = (char*)(addr) + len;

    for (cp = (char*)(addr); cp < end; cp += 64) __builtin_prefetch(cp, 1);
  }


  /**
   * \ingroup util
   * A collection of routines for creating and managing threads.
   *
   * The thread object performs limited exception forwarding.
   * exception throws within a thread of type const char* will be caught
   * and forwarded to the join() function.
   * If the call to join() is wrapped by a try-catch block, the exception
   * will be caught safely and thread cleanup will be completed properly.
   */
  class thread {
  public:

    /**
     * This class contains the data unique to each thread. All threads
     * are gauranteed to have an associated graphlab thread_specific
     * data. The thread object is copyable. 
     */  
    class tls_data {
    public:
      inline tls_data(size_t thread_id) : thread_id_(thread_id) { }
      inline size_t thread_id() { return thread_id_; }
      inline void set_thread_id(size_t t) { thread_id_ = t; }
      any& operator[](const size_t& id) { return local_data[id]; }
      bool contains(const size_t& id) const {
        return local_data.find(id) != local_data.end();
      }
      size_t erase(const size_t& id) {
        return local_data.erase(id);
      }
    private:
      size_t thread_id_;
      boost::unordered_map<size_t, any> local_data;
    }; // end of thread specific data


    /// Static helper routines
    // ===============================================================

    /**
     * Get the thread specific data associated with this thread
     */
    static tls_data& get_tls_data();
      
    /** Get the id of the calling thread.  This will typically be the
        index in the thread group. Between 0 to ncpus. */
    static inline size_t thread_id() { return get_tls_data().thread_id(); }
    
    /** Set the id of the calling thread.  This will typically be the
        index in the thread group. Between 0 to ncpus. */
    static inline void set_thread_id(size_t t) { get_tls_data().set_thread_id(t); }

    /**
     * Get a reference to an any object
     */
    static inline any& get_local(const size_t& id) {
      return get_tls_data()[id];
    }

    /**
     * Check to see if there is an entry in the local map
     */
    static inline bool contains(const size_t& id) {
      return get_tls_data().contains(id);
    }
    
    /**
     * Removes the entry from the local map.
     * @return number of elements erased.
     */
    static inline size_t erase(const size_t& id){
      return get_tls_data().erase(id);
    }
    
    /**
     * This static method joins the invoking thread with the other
     * thread object.  This thread will not return from the join
     * routine until the other thread complets it run.
     */
    static void join(thread& other);
    
    // Called just before thread exits. Can be used
    // to do special cleanup... (need for Java JNI)
    static void thread_destroy_callback();
    static void set_thread_destroy_callback(void (*callback)());

      
    /**
     * Return the number processing units (individual cores) on this
     * system
     */
    static size_t cpu_count();


  private:
    
    struct invoke_args{
      size_t m_thread_id;
      boost::function<void(void)> spawn_routine;   
      invoke_args(size_t m_thread_id, const boost::function<void(void)> &spawn_routine)
          : m_thread_id(m_thread_id), spawn_routine(spawn_routine) { };
    };
    
    //! Little helper function used to launch threads
    static void* invoke(void *_args);   
  
  public:
    
    /**
     * Creates a thread with a user-defined associated thread ID
     */
    inline thread(size_t thread_id = 0) : 
      m_stack_size(0), 
      m_p_thread(0),
      m_thread_id(thread_id),
      thread_started(false){
      // Calculate the stack size in in bytes;
      const int BYTES_PER_MB = 1048576; 
      const int DEFAULT_SIZE_IN_MB = 8;
      m_stack_size = DEFAULT_SIZE_IN_MB * BYTES_PER_MB;
    }

    /**
     * execute this function to spawn a new thread running spawn_function
     * routine 
     */
    void launch(const boost::function<void (void)> &spawn_routine);

    /**
     * Same as launch() except that you can specify a CPU on which to
     * run the thread.  This only currently supported in Linux and if
     * invoked on a non Linux based system this will be equivalent to
     * start().
     */
     void launch(const boost::function<void (void)> &spawn_routine, size_t cpu_id);


    /**
     * Join the calling thread with this thread.
     * const char* exceptions
     * thrown by the thread is forwarded to the join() function.
     */
    inline void join() {
      if(this == NULL) {
        std::cout << "Failure on join()" << std::endl;
        exit(EXIT_FAILURE);
      }
      join(*this);
    }

    /// Returns true if the thread is still running
    inline bool active() const {
      return thread_started;
    }
    
    inline ~thread() {  }

    /// Returns the pthread thread id
    inline pthread_t pthreadid() {
      return m_p_thread;
    }
  private:
    
    
    //! The size of the internal stack for this thread
    size_t m_stack_size;
    
    //! The internal pthread object
    pthread_t m_p_thread;
    
    //! the threads id
    size_t m_thread_id;
    
    bool thread_started;
  }; // End of class thread

  
  /**
   * \ingroup util
   * Manages a collection of threads.
   *
   * The thread_group object performs limited exception forwarding.
   * exception throws within a thread of type const char* will be caught
   * and forwarded to the join() function.
   * If the call to join() is wrapped by a try-catch block, the exception
   * will be caught safely and thread cleanup will be completed properly.
   *
   * If multiple threads are running in the thread-group, the master should
   * test if running_threads() is > 0, and retry the join().
   */
  class thread_group {
   private:
    size_t m_thread_counter;
    size_t threads_running;
    mutex mut;
    conditional cond;
    std::queue<std::pair<pthread_t, const char*> > joinqueue;
    // not implemented
    thread_group& operator=(const thread_group &thrgrp);
    thread_group(const thread_group&);
    static void invoke(boost::function<void (void)> spawn_function, thread_group *group);
   public:
    /** 
     * Initializes a thread group. 
     */
    thread_group() : m_thread_counter(0), threads_running(0) { }

    /** 
     * Launch a single thread which calls spawn_function No CPU affinity is
     * set so which core it runs on is up to the OS Scheduler
     */
    void launch(const boost::function<void (void)> &spawn_function);

    /**
     * Launch a single thread which calls spawn_function Also sets CPU
     *  Affinity
     */
    void launch(const boost::function<void (void)> &spawn_function, size_t cpu_id);

    /** Waits for all threads to complete execution. const char* exceptions
    thrown by threads are forwarded to the join() function.
    */
    void join();
    
    /// Returns the number of running threads.
    inline size_t running_threads() {
      return threads_running;
    }
    //! Destructor. Waits for all threads to complete execution
    inline ~thread_group(){ join(); }

  }; // End of thread group


  /// Runs f in a new thread. convenience function for creating a new thread quickly.
  inline thread launch_in_new_thread(const boost::function<void (void)> &f, 
                               size_t cpuid = size_t(-1)) {
    thread thr;
    if (cpuid != size_t(-1)) thr.launch(f, cpuid);
    else thr.launch(f);
    return thr;
  }

  /// an integer value padded to 64 bytes
  struct padded_integer {
    size_t val;
    char __pad__[64 - sizeof(size_t)];
  };
}; // End Namespace

#endif


================================================
FILE: src/graphlab/parallel/queued_rwlock.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef QUEUED_RWLOCK_HPP
#define QUEUED_RWLOCK_HPP


namespace graphlab {
  
  
#define QUEUED_RW_LOCK_REQUEST_READ 0
#define QUEUED_RW_LOCK_REQUEST_WRITE 1
#define QUEUED_RW_LOCK_REQUEST_NONE 2

/**
 * Fair rw-lock with local-only spinning implemented and
 * modified from 
 * Scalable Reader-Writer Synchronization for Shared-Memory Multiprocessors.
 * John M. Mellor-Crummey and Michael L. Scott
 */
class queued_rw_lock{
 public:
  
  union state_union {
    volatile uint32_t stateu;
    struct {
      volatile uint16_t successor_class;
      volatile bool blocked;
    } state;
  };
  
  struct request{
    void* id;  
    volatile request* volatile next;
    volatile state_union s;
    volatile char lockclass;
  };
 private:
  request* volatile tail;
  atomic<size_t> reader_count;
  request* volatile next_writer;
 public:
  queued_rw_lock(): tail(NULL), reader_count(0), next_writer(NULL) { }
  
  inline void writelock(request *I) {
    I->lockclass = QUEUED_RW_LOCK_REQUEST_WRITE;
    I->next = NULL;
    I->s.stateu = 0;
    I->s.state.blocked = true;
    I->s.state.successor_class = QUEUED_RW_LOCK_REQUEST_NONE;
    __sync_synchronize();
    request* predecessor = __sync_lock_test_and_set(&tail, I);

    if (predecessor == NULL) {
      next_writer = I;
      __sync_synchronize();
      if (reader_count.value == 0) {
        if (__sync_lock_test_and_set(&next_writer, (request*)NULL) == I) {
          I->s.state.blocked = false;
        }
      }
    }
    else {
      predecessor->s.state.successor_class = QUEUED_RW_LOCK_REQUEST_WRITE;
    __sync_synchronize();      
      predecessor->next = I;
    }
    // while I->blocked. continue
    volatile state_union& is = I->s;
    while (is.state.blocked) sched_yield();
    assert(reader_count.value == 0);
  }

  inline void wrunlock(request *I) {
    __sync_synchronize(); 
    if (I->next != NULL || !__sync_bool_compare_and_swap(&tail, I, (request*)NULL)) {
      // wait
      while(I->next == NULL) sched_yield();
     __sync_synchronize(); 
   
      if (I->next->lockclass == QUEUED_RW_LOCK_REQUEST_READ) {
        reader_count.inc();
      }
      I->next->s.state.blocked = false;
    }
  }

  inline void readlock(request *I)  {
    I->lockclass =QUEUED_RW_LOCK_REQUEST_READ;
    I->next = NULL;
    I->s.stateu = 0;
    I->s.state.successor_class = QUEUED_RW_LOCK_REQUEST_NONE;
    I->s.state.blocked = true;
    __sync_synchronize(); 
    request* predecessor = __sync_lock_test_and_set(&tail, I);
    if (predecessor == NULL) {
      reader_count.inc();
      I->s.state.blocked = false;
    }
    else {
      
      state_union tempold, tempnew;
      tempold.state.blocked = true;
      tempold.state.successor_class = QUEUED_RW_LOCK_REQUEST_NONE;
      tempnew.state.blocked = true;
      tempnew.state.successor_class = QUEUED_RW_LOCK_REQUEST_READ;
      __sync_synchronize();
      if (predecessor->lockclass == QUEUED_RW_LOCK_REQUEST_WRITE ||
          atomic_compare_and_swap(predecessor->s.stateu,
                                  tempold.stateu,
                                  tempnew.stateu)) {
        
        predecessor->next = I;
        // wait
        __sync_synchronize(); 
        volatile state_union& is = I->s;
        while(is.state.blocked) sched_yield();
      }
      else {
        reader_count.inc();
        predecessor->next = I;
        __sync_synchronize();
        I->s.state.blocked = false;
      }
    }
    __sync_synchronize();
    if (I->s.state.successor_class == QUEUED_RW_LOCK_REQUEST_READ) {
      
      // wait
      while(I->next == NULL) sched_yield();
      reader_count.inc();
      I->next->s.state.blocked = false;
    }
  }

  inline void rdunlock(request *I)  {
    __sync_synchronize();
    if (I->next != NULL || !__sync_bool_compare_and_swap(&tail, I, (request*)NULL)) {
      while(I->next == NULL) sched_yield();
      if (I->s.state.successor_class == QUEUED_RW_LOCK_REQUEST_WRITE) {
        next_writer = (request*)(I->next);
        __sync_synchronize();
      }
    }
    if (reader_count.dec() == 0) {
      __sync_synchronize(); 
      request * w = __sync_lock_test_and_set(&next_writer, (request*)NULL);
      if (w != NULL) {
        w->s.state.blocked = false;
        __sync_synchronize(); 
      }
    }
  }
};

}
#endif


================================================
FILE: src/graphlab/parallel/thread_pool.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/parallel/thread_pool.hpp>
#include <graphlab/logger/assertions.hpp>

namespace graphlab {


  thread_pool::thread_pool(size_t nthreads, bool affinity) {
    waiting_on_join = false;
    tasks_inserted = 0;
    tasks_completed = 0;
    cpu_affinity = affinity;
    pool_size = nthreads;
    spawn_thread_group();
  } // end of thread_pool


  void thread_pool::resize(size_t nthreads) {
    // if the current pool size does not equal the requested number of
    // threads shut the pool down and startup with correct number of
    // threads.  \todo: If the pool size is too small just add
    // additional threads rather than destroying the pool
    if(nthreads != pool_size) {
      pool_size = nthreads;

      // stop the queue from blocking
      spawn_queue.stop_blocking();
    
      // join the threads in the thread group
      while(true) {
        try {
          threads.join(); break;
        } catch (const char* error_str) {
          // this should not be possible!
          logstream(LOG_FATAL) 
            << "Unexpected exception caught in thread pool destructor: " 
            << error_str << std::endl;
        }
      }
      spawn_queue.start_blocking();
      spawn_thread_group();
    }
  } // end of set_nthreads


  size_t thread_pool::size() const { return pool_size; }


  /**
     Creates the thread group
  */
  void thread_pool::spawn_thread_group() {
    size_t ncpus = thread::cpu_count();
    // start all the threads if CPU affinity is set
    for (size_t i = 0;i < pool_size; ++i) {
      if (cpu_affinity) {
        threads.launch(boost::bind(&thread_pool::wait_for_task, this), i % ncpus);
      }
      else {
        threads.launch(boost::bind(&thread_pool::wait_for_task, this));
      }
    }
  } // end of spawn_thread_group


  void thread_pool::destroy_all_threads() {
    // wait for all execution to complete
    spawn_queue.wait_until_empty();
    // kill the queue
    spawn_queue.stop_blocking();
  
    // join the threads in the thread group
    while(1) {
      try {
        threads.join();
        break;
      }
      catch (const char* c) {
        // this should not be possible!
        logstream(LOG_FATAL) 
          << "Unexpected exception caught in thread pool destructor: " 
          << c << std::endl;
        ASSERT_TRUE(false);
      }
    }
  } // end of destroy_all_threads

  void thread_pool::set_cpu_affinity(bool affinity) {
    if (affinity != cpu_affinity) {
      cpu_affinity = affinity;
      // stop the queue from blocking
      spawn_queue.stop_blocking();
    
      // join the threads in the thread group
      while(1) {
        try {
          threads.join(); break;
        } catch (const char* c) {
          // this should not be possible!
          logstream(LOG_FATAL) 
            << "Unexpected exception caught in thread pool destructor: " 
            << c << std::endl;
          // ASSERT_TRUE(false); // unnecessary
        }
      }
      spawn_queue.start_blocking();
      spawn_thread_group();
    }
  } // end of set_cpu_affinity

      
  void thread_pool::wait_for_task() {
    while(1) {
      std::pair<std::pair<boost::function<void (void)>, int>, bool> queue_entry;
      // pop from the queue
      queue_entry = spawn_queue.dequeue();
      if (queue_entry.second) {
        // try to run the function. remember to put it in a try catch
        try {
          int virtual_thread_id = queue_entry.first.second;
          size_t cur_thread_id = thread::thread_id();
          if (virtual_thread_id != -1) {
            thread::set_thread_id(virtual_thread_id);
          }
          queue_entry.first.first();
          thread::set_thread_id(cur_thread_id);
        } catch(const char* ex) {
          // if an exception was raised, put it in the exception queue
          mut.lock();
          exception_queue.push(ex);
          event_condition.signal();
          mut.unlock();
        }
      
        mut.lock();
        tasks_completed++;
        // the waiting on join flag just prevents me from 
        // signaling every time completed == inserted. Which could be very
        // very often
        if (waiting_on_join && 
            tasks_completed == tasks_inserted) event_condition.signal();
        mut.unlock();
      }
      else {
        // quit if the queue is dead
        break;
      }
    }
  } // end of wait_for_task

  void thread_pool::launch(const boost::function<void (void)> &spawn_function, 
                           int thread_id) {
    mut.lock();
    tasks_inserted++;
    spawn_queue.enqueue(std::make_pair(spawn_function, thread_id));
    mut.unlock();
  }

  void thread_pool::join() {
    std::pair<bool, bool> eventret;
    // first we wait for the queue to empty
    spawn_queue.wait_until_empty();
  
    mut.lock();
    waiting_on_join = true;
    while(1) {
      // check the exception queue. 
      if (!exception_queue.empty()) {
        // pop an exception
        const char* ex = exception_queue.front();
        exception_queue.pop();
        // unlock and throw the event
        waiting_on_join = false;
        mut.unlock();
        throw(ex);
      }
      // nothing to throw, check if all tasks were completed
      if (tasks_completed == tasks_inserted) {
        // yup
        break;
      }
      event_condition.wait(mut);
    }
    waiting_on_join = false;
    mut.unlock();
  }


  thread_pool::~thread_pool() {
    destroy_all_threads();
  }

}


================================================
FILE: src/graphlab/parallel/thread_pool.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_THREAD_POOL_HPP
#define GRAPHLAB_THREAD_POOL_HPP

#include <boost/bind.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/blocking_queue.hpp>

namespace graphlab {

  /**
   * \ingroup util
   * Manages a pool of threads.
   * 
   * The interface is nearly identical to the \ref thread_group. 
   * The key difference is internal behavior. The thread pool preallocates a
   * collection of threads which it keeps asleep. When tasks are issued 
   * through the "launch" function, threads are woken up to perform the
   * tasks. 
   *
   * The thread_pool object performs limited exception forwarding.
   * exception throws within a thread of type const char* will be caught
   * and forwarded to the join() function.
   * If the call to join() is wrapped by a try-catch block, the exception
   * will be caught safely and thread cleanup will be completed properly.
   *
   * If multiple threads are running in the thread-group, the master should
   * test if running_threads() is > 0, and retry the join().
   *
   */
  class thread_pool {
  private:

    thread_group threads;
    blocking_queue<std::pair<boost::function<void (void)>, int> > spawn_queue;
    size_t pool_size;
      
    // protects the exception queue, and the task counters
    mutex mut;
    conditional event_condition;  // to wake up the joining thread
    std::queue<const char*> exception_queue;
    size_t tasks_inserted;
    size_t tasks_completed;
    bool waiting_on_join; // true if a thread is waiting in join

    bool cpu_affinity;
    // not implemented
    thread_pool& operator=(const thread_pool &thrgrp);
    thread_pool(const thread_pool&);
      
    /**
       Called by each thread. Loops around a queue of tasks.
    */
    void wait_for_task();

    /**
       Creates all the threads in the thread pool.
       Resets the task and exception queue
    */
    void spawn_thread_group();
      
    /**
       Destroys the thread pool.
       Also destroys the task queue
    */
    void destroy_all_threads();
  public:
      
    /* Initializes a thread pool with nthreads. 
     * If affinity is set, the nthreads will by default stripe across 
     * the available cores on the system. 
     */
    thread_pool(size_t nthreads = 2, bool affinity = false);
    
    /**
     * Set the number of threads in the queue
     */
    void resize(size_t nthreads);
    
    /**
     * Get the number of threads
     */
    size_t size() const;


    /**
     * Changes the CPU affinity. Note that pthread does not provide
     * a way to change CPU affinity on a currently started thread.
     * This function therefore waits for all threads in the pool
     * to finish their current task, and destroy all the threads. Then
     * new threads are created with the new affinity setting.
     */
    void set_cpu_affinity(bool affinity);
      
    /**
       Gets the CPU affinity.
    */
    bool get_cpu_affinity() { return cpu_affinity; };
  
    /** 
     * Launch a single thread which calls spawn_function. If affinity
     * is set on construction of the thread_pool, the thread handling the
     * function will be locked on to one particular CPU.
     *
     * If virtual_threadid is set, the target thread will appear to have
     * thread ID equal to the requested thread ID
     */
    void launch(const boost::function<void (void)> &spawn_function, 
                int virtual_threadid = -1);
  
  
    /** Waits for all threads to become free. const char* exceptions
        thrown by threads are forwarded to the join() function.
        Once this function returns normally, the queue is empty.
      
        Note that this function may not return if producers continually insert
        tasks through launch. 
    */
    void join();
      
    //! Destructor. Cleans up all threads
    ~thread_pool();
  };
  
}
#endif


================================================
FILE: src/graphlab/rpc/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/rpc/async_consensus.cpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/async_consensus.hpp>

namespace graphlab {
  async_consensus::async_consensus(distributed_control &dc,
                                   size_t required_threads_in_done,
                                   const dc_impl::dc_dist_object_base *attach)
    :rmi(dc, this), attachedobj(attach),
     last_calls_sent(0), last_calls_received(0),
     numactive(required_threads_in_done),
     ncpus(required_threads_in_done),
     done(false),
     trying_to_sleep(0),
     critical(ncpus, 0),
     sleeping(ncpus, 0),
     hastoken(dc.procid() == 0),
     cond(ncpus){

    cur_token.total_calls_sent = 0;
    cur_token.total_calls_received = 0;
    cur_token.last_change = (procid_t)(rmi.numprocs() - 1);
  }

  void async_consensus::reset() {
    last_calls_sent = 0;
    last_calls_received = 0;
    numactive = ncpus;
    done = false;
    trying_to_sleep = false;
    critical = std::vector<char>(ncpus, 0);
    sleeping = std::vector<char>(ncpus, 0);
    hastoken = (rmi.procid() == 0);
    cur_token.total_calls_sent = 0;
    cur_token.total_calls_received = 0;
    cur_token.last_change = (procid_t)(rmi.numprocs() - 1);
  }

  void async_consensus::force_done() {
    m.lock();
    done = true;
    m.unlock();
    cancel();
  }

  void async_consensus::begin_done_critical_section(size_t cpuid) {
    trying_to_sleep.inc();
    critical[cpuid] = true;
    m.lock();
  }


  void async_consensus::cancel_critical_section(size_t cpuid) {
    m.unlock();
    critical[cpuid] = false;
    trying_to_sleep.dec();
  }
  
  bool async_consensus::end_done_critical_section(size_t cpuid) {
    // if done flag is set, quit immediately
    if (done) {
      m.unlock();
      critical[cpuid] = false;
      trying_to_sleep.dec();
      return true;
    }
    /*
      Assertion: Since numactive is decremented only within 
      a critical section, and is incremented only within the same critical 
      section. Therefore numactive is a valid counter of the number of threads 
      outside of this critical section. 
    */
    --numactive;
  
    /*
      Assertion: If numactive is ever 0 at this point, the algorithm is done.
      WLOG, let the current thread which just decremented numactive be thread 0
    
      Since there is only 1 active thread (0), there must be no threads 
      performing insertions, and are no othe threads which are waking up.
      All threads must therefore be sleeping in cond.wait().
    */
    if (numactive == 0) {
      logstream(LOG_INFO) << rmi.procid() << ": Termination Possible" << std::endl;
      if (hastoken) pass_the_token();
    }
    sleeping[cpuid] = true;
    while(1) {
      // here we are protected by the mutex again.
      
      // woken up by someone else. leave the 
      // terminator
      if (sleeping[cpuid] == false || done) {
        break;
      }
      cond[cpuid].wait(m);
    }
    m.unlock();
    critical[cpuid] = false;
    trying_to_sleep.dec();
    return done;
  }
  
  void async_consensus::cancel() {
    /*
      Assertion: numactive > 0 if there is work to do.
      If there are threads trying to sleep, lets wake them up
    */
    if (trying_to_sleep > 0 || numactive < ncpus) {
      m.lock();
      size_t oldnumactive = numactive;
      // once I acquire this lock, all threads must be
      // in the following states
      // 1: still running and has not reached begin_critical_section()
      // 2: is sleeping in cond.wait()
      // 3: has called begin_critical_section() but has not acquired
      //    the mutex
      // In the case of 1,3: These threads will perform one more sweep
      // of their task queues. Therefore they will see any new job if available
      // in the case of 2: numactive must be < ncpus since numactive
      // is mutex protected. Then I can wake them up by
      // clearing their sleeping flags and broadcasting.
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            cond[i].signal();
          }
        }
        if (oldnumactive == 0 && !done) {
          logstream(LOG_INFO) << rmi.procid() << ": Waking" << std::endl;
        }

      }
      m.unlock();
    }
  }

  void async_consensus::cancel_one(size_t cpuhint) {
    if (critical[cpuhint]) {
      m.lock();
      size_t oldnumactive = numactive;
      // see new_job() for detailed comments
      if (sleeping[cpuhint]) {
        numactive += sleeping[cpuhint];
        sleeping[cpuhint] = 0;
        if (oldnumactive == 0 && !done) {
          logstream(LOG_INFO) << rmi.procid() << ": Waking" << std::endl;
        }
        cond[cpuhint].signal();
      }
      m.unlock();
    }
  }

  void async_consensus::receive_the_token(token &tok) {
    m.lock();
    // save the token
    hastoken = true;
    cur_token = tok;
    // if I am waiting on done, pass the token.
    logstream(LOG_INFO) << rmi.procid() << ": Token Received" << std::endl;
    if (numactive == 0) {
      pass_the_token();
    }
    m.unlock();
  }

  void async_consensus::pass_the_token() {
    // note that this function does not acquire the token lock
    // the caller must acquire it 
    assert(hastoken);
    // first check if we are done
    if (cur_token.last_change == rmi.procid() && 
        cur_token.total_calls_received == cur_token.total_calls_sent) {
      logstream(LOG_INFO) << "Completed Token: " 
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;
      // we have completed a loop around!
      // broadcast a completion
      for (procid_t i = 0;i < rmi.numprocs(); ++i) {
        if (i != rmi.procid()) {
          rmi.control_call(i,
                           &async_consensus::force_done);
        }
      }
      // set the complete flag
      // we can't call consensus() since it will deadlock
      done = true;
      // this is the same code as cancel(), but we can't call cancel 
      // since we are holding on to a lock
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            cond[i].signal();
          }
        }
      }

    }
    else {
      // update the token
      size_t callsrecv;
      size_t callssent;
    
      if (attachedobj) {
        callsrecv = attachedobj->calls_received();
        callssent = attachedobj->calls_sent();
      }
      else {
        callsrecv = rmi.dc().calls_received();
        callssent = rmi.dc().calls_sent();
      }

      if (callssent != last_calls_sent ||
          callsrecv != last_calls_received) {
        cur_token.total_calls_sent += callssent - last_calls_sent;
        cur_token.total_calls_received += callsrecv - last_calls_received;
        cur_token.last_change = rmi.procid();
      }
      //std::cout << "Sending token: (" << cur_token.total_calls_sent
      //<< ", " << cur_token.total_calls_received << ")" << std::endl;

      last_calls_sent = callssent;
      last_calls_received = callsrecv;
      // send it along.
      hastoken = false;
      logstream(LOG_INFO) << "Passing Token " << rmi.procid() << "-->" 
                          << (rmi.procid() + 1) % rmi.numprocs() << ": "
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;

      rmi.control_call((procid_t)((rmi.procid() + 1) % rmi.numprocs()),
                       &async_consensus::receive_the_token,
                       cur_token);
    }
  }
}


================================================
FILE: src/graphlab/rpc/async_consensus.hpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef ASYNC_TERMINATOR_HPP
#define ASYNC_TERMINATOR_HPP

#include <graphlab/parallel/pthread_tools.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object_base.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>


namespace graphlab {
  /**
   * \ingroup rpc
   * \brief This implements a distributed consensus algorithm which waits
   * for global completion of all computation/RPC events on a given object.
   *
   * The use case is as follows:
   * 
   * A collection of threads on a collection of distributed machines, each
   * running the following
   * 
   * \code
   * while (work to be done) {
   *    Do_stuff
   * }
   * \endcode
   * 
   * However, <tt>Do_stuff</tt> will include RPC calls which may introduce
   * work to other threads/machines.
   * Figuring out when termination is possible is complex. For instance RPC calls 
   * could be in-flight and not yet received. This async_consensus class 
   * implements a solution built around the algorithm in
   * <i>Misra, J.: Detecting Termination of Distributed Computations Using Markers, SIGOPS, 1983</i>
   * extended to handle the mixed parallelism (distributed with threading) case.
   * 
   * The main loop of the user has to be modified to:
   * 
   * \code
   * done = false;
   * while (!done) {
   *    Do_stuff
   *    // if locally, I see that there is no work to be done
   *    // we begin the consensus checking
   *    if (no work to be done) {
   *      // begin the critical section and try again
   *      consensus.begin_done_critical_section();
   *      // if still no work to be done
   *      if (no work to be done) {
   *        done = consensus.end_done_critical_section();
   *      }
   *      else {
   *        consensus.cancel_critical_section();
   *      }
   *    }
   * }
   * 
   * \endcode
   * 
   * Additionally, incoming RPC calls which create work must ensure there are
   * active threads which are capable of processing the work. An easy solution 
   * will be to simply cancel_one(). Other more optimized solutions
   * include keeping a counter of the number of active threads, and only calling
   * cancel() or cancel_one() if all threads are asleep. (Note that the optimized
   * solution does require some care to ensure dead-lock free execution).
   *
   * The async_consensus works with regular kernel threads. See 
   * \ref graphlab::fiber_async_consensus for a version which works with
   * fibers.
   *
   * \see graphlab::fiber_async_consensus
   */
  class async_consensus {
  public:
    /** \brief Constructs an asynchronous consensus object
      *
      * The consensus procedure waits till all threads have no work to do and are 
      * waiting in consensus, and there is no communication going on which
      * could wake up a thread. The consensus object is therefore associated
      * with a communication context, either a graphlab::dc_dist_object,
      * or the global context (the root distributed_control).
      * 
      * \param dc The distributed control object to use for communication
      * \param required_threads_in_done local consensus is achieved if this many
      *                                 threads are waiting for consensus locally.
      * \param attach The context to associate with. If NULL, we associate with
      *               the global context. 
      */
    async_consensus(distributed_control &dc, size_t required_threads_in_done = 1,
                    const dc_impl::dc_dist_object_base* attach = NULL);


    /**
     * \brief A thread enters the critical section by calling
     * this function. 
     *  
     * After which it should check its termination 
     * condition locally. If termination condition
     * is still fulfilled, end_done_critical_section() should be called.
     * Otherwise cancel_critical_section() should be called.
     *
     * \param cpuid Thread ID of the thread.
     */
    void begin_done_critical_section(size_t cpuid);

    /**
     * \brief Leaves the critical section because termination condition
     * is not fullfilled.
     *
     * See begin_done_critical_section()
     * \param cpuid Thread ID of the thread.
     */
    void cancel_critical_section(size_t cpuid);

    /**
     * \brief Thread must call this function if termination condition
     * is fullfilled while in the critical section. 
     *
     * See begin_done_critical_section()
     * 
     * \param cpuid Thread ID of the thread.
     * \returns True if global consensus is achieved. False otherwise. 
     */
    bool end_done_critical_section(size_t cpuid);

    /**
     * \brief Forces the consensus to be set
     */
    void force_done();
  
    
    /// \brief Wakes up all local threads waiting in done()
    void cancel();

    /// \brief Wakes up a specific thread waiting in done()
    void cancel_one(size_t cpuid);

    /// \brief Returns true if consensus is achieved.
    bool is_done() const {
      return done;
    }
    /** \brief Resets the consensus object. Must be called simultaneously by
     * exactly one thread on each machine.
     * This function is not safe to call while consensus is being achieved.
     */
    void reset();
 
  private:

    /**
     * The token object that is passed around the machines.
     * It counts the total number of RPC calls that has been sent
     * or received, as well as the machine which last changed the value.
     * When the token goes one full round with no change, consensus is
     * achieved.
     */
    struct token {
      size_t total_calls_sent;
      size_t total_calls_received;
      procid_t last_change;
      void save(oarchive &oarc) const {
        oarc << total_calls_sent << total_calls_received << last_change;
      }

      void load(iarchive &iarc) {
        iarc >> total_calls_sent >> total_calls_received >> last_change;
      }
    };

    
    dc_dist_object<async_consensus> rmi;
    const dc_impl::dc_dist_object_base* attachedobj;
  
    size_t last_calls_sent;
    size_t last_calls_received;

 
    /// counts the number of threads which are not sleeping
    /// protected by the mutex
    size_t numactive; 
    
    /// Total number of CPUs
    size_t ncpus;
    
    /// once flag is set, the terminator is invalid, and all threads
    /// should leave
    bool done;
    
    /// set if abort() is called
    //  bool forced_abort;    
    
    /// Number of threads which have called
    /// begin_critical_section(), and have not left end_critical_section()
    /// This is an atomic counter and is not protected.
    atomic<size_t> trying_to_sleep;
    
    /// critical[i] is set if thread i has called 
    /// begin_critical_section(), but has not left end_critical_section()
    /// sum of critical should be the same as trying_to_sleep
    std::vector<char> critical;
    
    /// sleeping[i] is set if threads[i] is in cond.wait()
    std::vector<char> sleeping;
    
    
    bool hastoken;
    /// If I have the token, the value of the token
    token cur_token;

    mutex m;
    std::vector<conditional> cond;
      

    void receive_the_token(token &tok);
    void pass_the_token();
  };

}
#endif


================================================
FILE: src/graphlab/rpc/buffered_exchange.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BUFFERED_EXCHANGE_HPP
#define GRAPHLAB_BUFFERED_EXCHANGE_HPP

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/util/mpi_tools.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \ingroup rpc
   *
   * The buffered exchange provides high performance exchange of bulk data 
   * between machines. The basic usage is simple:
   *
   * For instance, if we are doing bulk exchanges of integers:
   * \code
   * buffered_exchange<int> exchange(dc, numthreads);
   *  .. In parallel .. {
   *    exchange.send([target machine], [value to send to target], [thread id])
   *    exchange.partial_flush([thread id]);
   *  }
   *
   *  .. now in 1 thread ..
   *  exchange.flush()
   *
   *  .. In parallel .. {
   *    procid_t proc;
   *    buffered_exchange<int>::buffer_type buffer; // (an array of integers)
   *    while(exchange.recv(proc, buffer)) {
   *      process array of integers (buffeer) which were sent by proc
   *    }
   *  }
   * \endcode
   *
   * \note The buffered exchange sends data in the background, so recv can be
   * called even before the flush calls.
   *
   * \see graphlab::fiber_buffered_exchange
   */
  template<typename T>
  class buffered_exchange {
  public:
    typedef std::vector<T> buffer_type;

  private:
    struct buffer_record {
      procid_t proc;
      buffer_type buffer;
      buffer_record() : proc(-1)  { }
    }; // end of buffer record


    /** The rpc interface for this class */
    mutable dc_dist_object<buffered_exchange> rpc;

    std::deque< buffer_record > recv_buffers;
    mutex recv_lock;


    struct send_record {
      oarchive* oarc;
      size_t numinserts;
    };

    std::vector<send_record> send_buffers;
    std::vector< mutex >  send_locks;
    const size_t num_threads;
    const size_t max_buffer_size;


    // typedef boost::function<void (const T& tref)> handler_type;
    // handler_type recv_handler;

  public:
    /**
     * Constructs a buffered exchange object.
     *
     * \ref dc The master distributed_control object
     * \ref num_threads The number of threads to support. This is essentially
     *                  the number of fine-grained locks to use. This does not
     *                  need to match the total number of threads used during 
     *                  the exchange process, but there are performance / contention
     *                  advantages if this matches.
     * \ref max_buffer_size The size of the per thread and per target send buffer.
     */
    buffered_exchange(distributed_control& dc,
                      const size_t num_threads = 1,
                      const size_t max_buffer_size = DEFAULT_BUFFERED_EXCHANGE_SIZE) :
      rpc(dc, this),
      send_buffers(num_threads *  dc.numprocs()),
      send_locks(num_threads *  dc.numprocs()),
      num_threads(num_threads),
      max_buffer_size(max_buffer_size) {
       //
       for (size_t i = 0;i < send_buffers.size(); ++i) {
         // initialize the split call
         send_buffers[i].oarc = rpc.split_call_begin(&buffered_exchange::rpc_recv);
         send_buffers[i].numinserts = 0;
         // begin by writing the src proc.
         (*(send_buffers[i].oarc)) << rpc.procid();
       }
       rpc.barrier();
      }


    ~buffered_exchange() {
      // clear the send buffers
      for (size_t i = 0;i < send_buffers.size(); ++i) {
        rpc.split_call_cancel(send_buffers[i].oarc);
      }
    }
    // buffered_exchange(distributed_control& dc, handler_type recv_handler,
    //                   size_t buffer_size = 1000) :
    // rpc(dc, this), send_buffers(dc.numprocs()), send_locks(dc.numprocs()),
    // max_buffer_size(buffer_size), recv_handler(recv_handler) { rpc.barrier(); }


    /**
     * Sends a value to a target machine.
     * Use the send buffer owned by thread_id.
     */
    void send(const procid_t proc, const T& value, const size_t thread_id = 0) {
      ASSERT_LT(proc, rpc.numprocs());
      ASSERT_LT(thread_id, num_threads);
      const size_t index = thread_id * rpc.numprocs() + proc;
      ASSERT_LT(index, send_locks.size());
      send_locks[index].lock();

      (*(send_buffers[index].oarc)) << value;
      ++send_buffers[index].numinserts;

      if(send_buffers[index].oarc->off >= max_buffer_size) {
        oarchive* prevarc = swap_buffer(index);
        send_locks[index].unlock();
        // complete the send
        rpc.split_call_end(proc, prevarc);
      } else {
        send_locks[index].unlock();
      }
    } // end of send

    /**
     * Flushes the send buffer owned owned by thread_id.
     */
    void partial_flush(size_t thread_id) {
      for(procid_t proc = 0; proc < rpc.numprocs(); ++proc) {
        const size_t index = thread_id * rpc.numprocs() + proc;
        ASSERT_LT(proc, rpc.numprocs());
        if (send_buffers[index].numinserts > 0) {
          send_locks[index].lock();
          oarchive* prevarc = swap_buffer(index);
          send_locks[index].unlock();
          // complete the send
          rpc.split_call_end(proc, prevarc);
          rpc.dc().flush_soon(proc);
        }
      }
    }

    /**
     * Flushes all send buffers. Must be called only on one thread.
     * Will not return until all machines call flush.
     */
    void flush() {
      for(size_t i = 0; i < send_buffers.size(); ++i) {
        const procid_t proc = i % rpc.numprocs();
        ASSERT_LT(proc, rpc.numprocs());
        send_locks[i].lock();
        if (send_buffers[i].numinserts > 0) {
          oarchive* prevarc = swap_buffer(i);
          // complete the send
          rpc.split_call_end(proc, prevarc);
        }
        send_locks[i].unlock();
      }
      rpc.dc().flush_soon();
      rpc.full_barrier();
    } // end of flush


    /**
     * Returns a collection of T sent by ret_proc.
     *
     * \param ret_proc On successful return, will contain a valid procid indicating
     *               that the values in the buffer were sent by that process.
     * \param ret_buffer A sequence of values sent by ret_proc
     * \param try_lock If true, will not acquire the lock if the lock is 
     *                 contended. Useful for polling the receive buffer
     *                 while sending is occuring.
     * \return True on success and there are values in the buffer. 
     *         False if the receive buffer is empty. Or if try_lock is set,
     *         False may also indicate the buffer lock is being contended.
     */
    bool recv(procid_t& ret_proc, buffer_type& ret_buffer,
              const bool try_lock = false) {
      fiber_control::fast_yield();
      dc_impl::blob read_buffer;
      bool has_lock = false;
      if(try_lock) {
        if (recv_buffers.empty()) return false;
        has_lock = recv_lock.try_lock();
      } else {
        recv_lock.lock();
        has_lock = true;
      }
      bool success = false;
      if(has_lock) {
        if(!recv_buffers.empty()) {
          success = true;
          buffer_record& rec =  recv_buffers.front();
          // read the record
          ret_proc = rec.proc;
          ret_buffer.swap(rec.buffer);
          ASSERT_LT(ret_proc, rpc.numprocs());
          recv_buffers.pop_front();
        }
        recv_lock.unlock();
      }

      return success;
    } // end of recv


    /**
     * Returns the number of elements available for receiving.
     */
    size_t size() const {
      typedef typename std::deque< buffer_record >::const_iterator iterator;
      recv_lock.lock();
      size_t count = 0;
      foreach(const buffer_record& rec, recv_buffers) {
        count += rec.buffer.size();
      }
      recv_lock.unlock();
      return count;
    } // end of size

    /**
     * Returns true if there are no elements available for receiving.
     */
    bool empty() const { return recv_buffers.empty(); }

    void clear() { }

    void barrier() { rpc.barrier(); }
  private:
    void rpc_recv(size_t len, wild_pointer w) {
      buffer_type tmp;
      iarchive iarc(reinterpret_cast<const char*>(w.ptr), len);
      // first desrialize the source process
      procid_t src_proc; iarc >> src_proc;
      ASSERT_LT(src_proc, rpc.numprocs());
      // create an iarchive which just points to the last size_t bytes
      // to get the number of elements
      iarchive numel_iarc(reinterpret_cast<const char*>(w.ptr) + len - sizeof(size_t),
                          sizeof(size_t));
      size_t numel = 0; 
      numel_iarc.read(reinterpret_cast<char*>(&numel), sizeof(size_t));
      //std::cout << "Receiving: " << numel << "\n";
      tmp.resize(numel);
      for (size_t i = 0;i < numel; ++i) {
        iarc >> tmp[i];
      }

      recv_lock.lock();
      recv_buffers.push_back(buffer_record());
      buffer_record& rec = recv_buffers.back();
      rec.proc = src_proc;
      rec.buffer.swap(tmp);
      recv_lock.unlock();
    } // end of rpc rcv


    // create a new buffer for send_buffer[index], returning the old buffer
    oarchive* swap_buffer(size_t index) {
      oarchive* swaparc = rpc.split_call_begin(&buffered_exchange::rpc_recv);
      std::swap(send_buffers[index].oarc, swaparc);
      // write the length at the end of the buffere are returning
      (*swaparc).write(reinterpret_cast<char*>(&send_buffers[index].numinserts), sizeof(size_t));

      //std::cout << "Sending : " << (send_buffers[index].numinserts)<< "\n";
      // reset the insertion count
      send_buffers[index].numinserts = 0;
      // write the current procid into the new buffer
      (*(send_buffers[index].oarc)) << rpc.procid();
      return swaparc;
    }


  }; // end of buffered exchange


}; // end of graphlab namespace
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/caching_dht.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/*
  \author Yucheng Low (ylow), Joseph Gonzalez (jegonzal)
  An implementation of a distributed integer -> integer map with caching
  capabilities. 

*/

#ifndef GRAPHLAB_CACHING_DHT_HPP
#define GRAPHLAB_CACHING_DHT_HPP
#include <boost/unordered_map.hpp>
#include <boost/intrusive/list.hpp>
#include <boost/functional/hash.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/synchronized_unordered_map.hpp>
#include <graphlab/util/dense_bitset.hpp>

namespace graphlab {

  namespace dc_impl {
    /**
     * \internal 
     * \ingroup rpc
     A cache entry for the caching_dht. 
     Boost intrusive is used to provide the LRU capabilities here
    */
    template<typename KeyType, typename ValueType>
    class lru_list {
    public:

      KeyType key; /// the key assiciated with this cache entry
      ValueType value; /// the value assiciated with this cache entry
      typedef boost::intrusive::list_member_hook<
        boost::intrusive::link_mode<boost::intrusive::auto_unlink> >
      lru_member_hook_type;

      lru_member_hook_type member_hook_;
      ~lru_list() { }
      explicit lru_list(const KeyType& k = KeyType(), const ValueType &v = ValueType()) : key(k), value(v) {
      }
    };

  } // namespace dc_impl

  /**
   * \internal
   * \ingroup rpc
   This implements a limited distributed key -> value map with caching capabilities
   It is up to the user to determine cache invalidation policies. User explicitly
   calls the invalidate() function to clear local cache entries
  */
  template<typename KeyType, typename ValueType>
  class caching_dht{
  public:

    typedef dc_impl::lru_list<KeyType, ValueType> lru_entry_type;
    /// datatype of the data map
    typedef boost::unordered_map<KeyType, ValueType> map_type;
    /// datatype of the local cache map
    typedef boost::unordered_map<KeyType, lru_entry_type* > cache_type;


    typedef boost::intrusive::member_hook<lru_entry_type,
                                          typename lru_entry_type::lru_member_hook_type,
                                          &lru_entry_type::member_hook_> MemberOption;
    /// datatype of the intrusive LRU list embedded in the cache map
    typedef boost::intrusive::list<lru_entry_type, 
                                   MemberOption, 
                                   boost::intrusive::constant_time_size<false> > lru_list_type;


  private:
    mutable dc_dist_object<caching_dht<KeyType, ValueType> > rpc;
  
    mutex datalock;
    map_type data;  /// The actual table data that is distributed
 
    mutex cachelock; /// lock for the cache datastructures
    mutable cache_type cache;   /// The cache table
    mutable lru_list_type lruage; /// THe LRU linked list associated with the cache


    procid_t numprocs;   /// NUmber of processors
    size_t maxcache;     /// Maximum cache size allowed

    mutable size_t reqs;
    mutable size_t misses;

    boost::hash<KeyType> hasher;

  public:

    /// Constructor. Creates the integer map.
    caching_dht(distributed_control &dc, 
                size_t max_cache_size = 1024):rpc(dc, this),data(11) {
      cache.rehash(max_cache_size);
      maxcache = max_cache_size;
      logger(LOG_INFO, "%d Creating distributed_hash_table. Cache Limit = %d", 
             dc.procid(), maxcache);
      reqs = 0;
      misses = 0;
    }


    ~caching_dht() {
      data.clear();
      typename cache_type::iterator i = cache.begin();
      while (i != cache.end()) {
        delete i->second;
        ++i;
      }
      cache.clear();
    }
  
  
    /// Sets the key to the value
    void set(const KeyType& key, const ValueType &newval)  {
      size_t hashvalue = hasher(key);
      size_t owningmachine = hashvalue % rpc.dc().numprocs();
      if (owningmachine == rpc.dc().procid()) {
        datalock.lock();
        data[key] = newval;
        datalock.unlock();
      } else {
        rpc.remote_call(owningmachine, 
                        &caching_dht<KeyType,ValueType>::set, 
                        key,
                        newval);
        update_cache(key, newval);
      }
    }
  

    /** Gets the value associated with the key. returns true on success.. */
    std::pair<bool, ValueType> get(const KeyType &key) const {
      // figure out who owns the key
      size_t hashvalue = hasher(key);
      size_t owningmachine = hashvalue % rpc.dc().numprocs();
    
      std::pair<bool, ValueType> ret;
      // if I own the key, get it from the map table
      if (owningmachine == rpc.dc().procid()) {
        datalock.lock();
        typename map_type::const_iterator iter = data.find(key);    
        if (iter == data.end()) {
          ret.first = false;
        } else {
          ret.first = true;
          ret.second = iter->second;
        }
        datalock.unlock();
      } else {
        ret = rpc.remote_request(owningmachine, 
                                 &caching_dht<KeyType,ValueType>::get, 
                                 key);
        if (ret.first) update_cache(key, ret.second);
        else invalidate(key);
      }
      return ret;
    }


    /** Gets the value associated with the key, reading from cache if available
        Note that the cache may be out of date. */
    std::pair<bool, ValueType> get_cached(const KeyType &key) const {
      // if this is to my current machine, just get it and don't go to cache
      size_t hashvalue = hasher(key);
      size_t owningmachine = hashvalue % rpc.dc().numprocs();
      if (owningmachine == rpc.dc().procid()) return get(key);
    
    
      reqs++;
      cachelock.lock();
      // check if it is in the cache
      typename cache_type::iterator i = cache.find(key);
      if (i == cache.end()) {
        // nope. not in cache. Call the regular get
        cachelock.unlock();
        misses++;
        return get(key);
      }
      else {
        // yup. in cache. return the value
        std::pair<bool, ValueType> ret;
        ret.first = true;
        ret.second = i->second->value;
        // shift the cache entry to the head of the LRU list
        lruage.erase(lru_list_type::s_iterator_to(*(i->second)));
        lruage.push_front(*(i->second));
        cachelock.unlock();
        return ret;
      }
    }

    /// Invalidates the cache entry associated with this key
    void invalidate(const KeyType &key) const{
      cachelock.lock();
      // is the key I am invalidating in the cache?
      typename cache_type::iterator i = cache.find(key);
      if (i != cache.end()) {
        // drop it from the lru list
        delete i->second;
        cache.erase(i);
      }
      cachelock.unlock();
    }


    double cache_miss_rate() {
      return double(misses) / double(reqs);
    }

    size_t num_gets() const {
      return reqs;
    }
    size_t num_misses() const {
      return misses;
    }

    size_t cache_size() const {
      return cache.size();
    }

  private:

  
    /// Updates the cache with this new value
    void update_cache(const KeyType &key, const ValueType &val) const{
      cachelock.lock();
      typename cache_type::iterator i = cache.find(key);
      // create a new entry
      if (i == cache.end()) {
        cachelock.unlock();
        // if we are out of room, remove the lru entry
        if (cache.size() >= maxcache) remove_lru();
        cachelock.lock();
        // insert the element, remember the iterator so we can push it
        // straight to the LRU list
        std::pair<typename cache_type::iterator, bool> ret = cache.insert(std::make_pair(key, new lru_entry_type(key, val)));
        if (ret.second)  lruage.push_front(*(ret.first->second));
      } else {
        // modify entry in place
        i->second->value = val;
        // swap to front of list
        //boost::swap_nodes(lru_list_type::s_iterator_to(i->second), lruage.begin());
        lruage.erase(lru_list_type::s_iterator_to(*(i->second)));
        lruage.push_front(*(i->second));
      }
      cachelock.unlock();
    }

    /// Removes the least recently used element from the cache
    void remove_lru() const{
      cachelock.lock();
      KeyType keytoerase = lruage.back().key;
      // is the key I am invalidating in the cache?
      typename cache_type::iterator i = cache.find(keytoerase);
      if (i != cache.end()) {
        // drop it from the lru list
        delete i->second;
        cache.erase(i);
      }
      cachelock.unlock();
    }

  };

}
#endif


================================================
FILE: src/graphlab/rpc/circular_char_buffer.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstdlib>
#include <cstring>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/rpc/circular_char_buffer.hpp>

namespace graphlab {
  
  circular_char_buffer::circular_char_buffer(std::streamsize initial) {
    initial = std::max<size_t>((size_t)initial, 4);
    // allocate something to start with
    buffer = (char*)malloc(initial);
    head = 0;
    tail = 0;
    bufsize = initial;
    len = 0;
  }

  circular_char_buffer::circular_char_buffer(const circular_char_buffer &src) {
    // allocate minimum of 4 bytes
    bufsize = std::max<size_t>(src.size(), 4);
    buffer = (char*)malloc(bufsize);
  
    // copy the buffer in src
    src.peek(buffer, src.size());
    // set the lengths
    len = src.size();
    tail = src.size();
    head = 0;
    if (tail == bufsize) tail = 0;
  }
  
  circular_char_buffer& 
  circular_char_buffer::operator=(const circular_char_buffer& src) {
    // reset head and tail
    clear();
    // make sure we have enough room
    reserve(src.size());
    src.peek(buffer, src.size());
    len = src.size();
    tail = src.size();
    if (tail == bufsize) tail = 0;
    return *this;
  }
  
  void circular_char_buffer::reserve(std::streamsize s) {
    // minimum of 4 bytes. disallow reserve of 0
    if (s <= 4) s = 4;
    // do nothing if s is smaller than the current buffer size
    if (s <= bufsize) return;
  
    // do a reallocation
    buffer = (char*)realloc((void*)buffer, s);

    // now, we need to be careful to reposition the head and tail
    // there are 2 cases
  
    // case 1:  no loop around,
    //         Easiest case. do nothing. just update bufsize
    // case 2:  we have a loop around,
    //         copy the left side of the loop around to the end. 
    if (tail >= head) {
      bufsize = s;
    }
    else {
      // how much excess space do we have now?
      size_t excess = (size_t)s - bufsize;
      // move up to excess bytes to the end 
      size_t movebytes = std::min<size_t>(tail, excess);
      memcpy(buffer + bufsize, buffer, movebytes); 
      // move the remaining bytes to the start of the buffer
      memmove(buffer, buffer+movebytes, tail - movebytes);
      // update buftail
      // if movebytes == tail, then tail has been wiped out
      // and it is no longer a looparound
      bufsize = s;    
      tail = (head + len) % bufsize;

    }
    consistency_check();
  }

  void circular_char_buffer::squeeze() {
    // squeeze to a minimum of 4 bytes
    if (bufsize <= 4) return;
    // 2 cases
    // case 1: no loop around
    // case 2: loop around. Easiest solution is to allocate a new buffer
    if (tail >= head) {
      if (head > 0) memmove(buffer, buffer+head, len);
      std::streamsize efflen = std::max(len + 1, std::streamsize(4));
      buffer = (char*)realloc((void*)buffer, efflen);
      head = 0;
      tail = len;
      bufsize = efflen;
    }
    else {
      // allocate a new buffer
      std::streamsize efflen = std::max(len + 1, std::streamsize(4));
      char *newbuf = (char*)malloc(efflen);
      // read into this buffer
      peek(newbuf, len);
      // free the old buffer
      free(buffer);
      buffer = newbuf;
      head = 0;
      tail = len;
      bufsize = efflen;
    }
    consistency_check();
  }


  void circular_char_buffer::align() {
    // squeeze to a minimum of 4 bytes
    if (bufsize <= 4) return;
    // 2 cases
    // case 1: no loop around
    // case 2: loop around. Easiest solution is to allocate a new buffer
    if (tail >= head) {
      if (head > 0) memmove(buffer, buffer+head, len);
      head = 0;
      tail = len;
    }
    else {
      // allocate a new buffer
      char *newbuf = (char*)malloc(bufsize);
      // read into this buffer
      peek(newbuf, len);
      // free the old buffer
      free(buffer);
      buffer = newbuf;
      head = 0;
      tail = len;
    }
  }

  bool circular_char_buffer::align_requires_alloc() {
    // squeeze to a minimum of 4 bytes
    if (bufsize <= 4) return false;
    // 2 cases
    // case 1: no loop around
    // case 2: loop around. Easiest solution is to allocate a new buffer
    if (tail >= head) {
      return false;
    }
    else {
      return true;
    }
  }
  std::streamsize circular_char_buffer::peek(char* c, 
                                             std::streamsize clen) const {
    std::streamsize readlen = std::min(clen, len);
    // eliminate the case where head == tail, but buffer is empty
    if (readlen == 0) return 0;
  
    // first copy from head to end of buffer
    std::streamsize firstcopy = std::min(bufsize - head, readlen);
    memcpy(c, buffer+head, firstcopy);
    if (firstcopy == readlen) return readlen;
    // second copy from beginning of buffer to tail
    std::streamsize secondcopy = std::min(tail, readlen - firstcopy);
    memcpy(c+firstcopy, buffer, secondcopy);
    consistency_check();

    return readlen;

  }

  std::streamsize circular_char_buffer::skip(std::streamsize clen) {
    std::streamsize readlen = std::min(clen, len);
    head += readlen;
    if (head >= bufsize) head -= bufsize;
    len -= readlen;
    consistency_check(); 
    return readlen;
  }

  std::streamsize circular_char_buffer::read(char* c, 
                                             std::streamsize clen) {
    if (len == 0) return -1;
    std::streamsize readlen = peek(c, clen);
    skip(readlen);
    consistency_check(); 

    return readlen;
  }

  std::streamsize circular_char_buffer::peek(std::string &c, 
                                             std::streamsize clen) const{
    c.clear();
    c.resize(clen);
    return peek(const_cast<char*>(c.c_str()), clen);
  }

  std::streamsize circular_char_buffer::read(std::string &c, 
                                             std::streamsize clen) {
    c.clear();
    c.resize(clen);
    return read(const_cast<char*>(c.c_str()), clen);
  }


  std::streamsize circular_char_buffer::write(const char* c, 
                                              std::streamsize clen) {
    // if we do not have enough capacity.
    // make sure we have enough capacity
    reserve(clen + len + 1);
    len += clen;
  
    std::streamsize firstcopy = std::min(clen, bufsize - tail);
    memcpy(buffer + tail, c, firstcopy);
    tail += firstcopy;
    if (tail == bufsize) tail = 0;
    if (firstcopy == clen) {
      consistency_check(); 
      return clen;
    }
  
    std::streamsize secondcopy = clen - firstcopy;
    memcpy(buffer, c + firstcopy, secondcopy);
    tail += secondcopy;
    consistency_check(); 
    return clen;
  }

  void circular_char_buffer::clear() {
    head = 0; tail = 0; len = 0;
  }

  circular_char_buffer::~circular_char_buffer() {
    free(buffer);
  }

  std::streamsize circular_char_buffer::introspective_read(char* &s) {
    if (len == 0) {
      s = NULL;
      return 0;
    }
    s = buffer + head;
    // how much we do read?
    // we can go up to the end of the buffer, or until a looparound
    // case 1: no looparound
    // case 2: looparound
    std::streamsize readlen = 0;
    if (tail > head) {
      readlen = tail - head;
    }
    else {
      readlen = bufsize - head;
    }
    skip(readlen);
    return readlen;
  }

  std::streamsize circular_char_buffer::introspective_read(char* &s, std::streamsize clen) {
    if (len == 0) {
      s = NULL;
      return 0;
    }
    s = buffer + head;
    // how much we do read?
    // we can go up to the end of the buffer, or until a looparound
    // case 1: no looparound
    // case 2: looparound
    std::streamsize readlen = 0;
    if (tail > head) {
      readlen = tail - head;
    }
    else {
      readlen = bufsize - head;
    }
    if (clen < readlen) readlen = clen;

    skip(readlen);
    return readlen;
  }


  std::streamsize circular_char_buffer::introspective_write(char* &s) {
    s = buffer + tail;
    if (tail >= head) {
      // case 1. no looparound. 
      return bufsize - tail - (head==0);
    }
    else {
      // case 2 looparound
      return head - tail - 1;
    }
  }
  
  void circular_char_buffer::advance_write(std::streamsize bytes) {
    tail += bytes;
    if (tail >= bufsize) tail -= bufsize;
    len += bytes;
    consistency_check();
  }
  
};


================================================
FILE: src/graphlab/rpc/circular_char_buffer.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_CIRCULAR_CHAR_BUFFER_HPP
#define GRAPHLAB_CIRCULAR_CHAR_BUFFER_HPP
#include <string>
#include <iostream>  
#include <graphlab/logger/assertions.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/categories.hpp>
namespace graphlab {
  
  /**
   * \ingroup rpc
   * \internal
   A self-resizing circular buffer of characters
  */
  class circular_char_buffer {
  public:
   
    /// Creates a circular buffer with some initial capacity
    circular_char_buffer(std::streamsize initialsize = 1024);
    /// copy constructor
    circular_char_buffer(const circular_char_buffer &src);
    /// assignment operator
    circular_char_buffer& operator=(const circular_char_buffer& src);
  
    /// destructors
    ~circular_char_buffer();
  
    /// writes len bytes into the buffer
    std::streamsize write(const char* c, std::streamsize clen);
  
    /** tries to peek up to 'len' bytes from the buffer.
        the actual number of bytes read will be returned.
        This is a non-destructive operation */
    std::streamsize peek(char* c, std::streamsize clen) const;
  
    /** reads up to 'len' bytes from the buffer.
        the actual number of bytes read will be returned.
        This is a destructive operation */
    std::streamsize read(char* c, std::streamsize clen);

  
    /** tries to peek up to 'len' bytes from the buffer.
        the actual number of bytes read will be returned.
        This is a non-destructive operation. string overload of peek() */
    std::streamsize peek(std::string &s, std::streamsize clen) const;
  
    /** reads up to 'len' bytes from the buffer.  the actual number of
        bytes read will be returned.  This is a destructive
        operation. string overload of read() */
    std::streamsize read(std::string &s, std::streamsize clen);  
  
    /** skip some number of input bytes. Returns the number of bytes
        actually skipped*/
    std::streamsize skip(std::streamsize clen);
  
    /** reserves at least s bytes of capacity. Tries to perform as few
        memory copies as possible. No change is made if s is smaller
        than the current capacity. */
    void reserve(std::streamsize s);
  
    /** Squeezes out all empty capacity in the buffer so that
        the capacity is the same as the length of the buffer */
    void squeeze();

  
    /** Rotates the buffer so that the head is at index 0.
        buffer reserved size is preserved*/
    void align();


    /** Returns true if realignment requires a reallocation */
    bool align_requires_alloc();
  
    /**
     * Returns a pointer (through s) and a length of the read.  This
     * pointer is a direct pointer into the internal buffer of this
     * datastructure. The pointer is valid as long as no other
     * operations are performed on this structure.  The length of the
     * introspective_read may be less than the actual length of the
     * buffer. Multiple calls to introspective_read may be necessary
     * to read all data in the buffer. If the function returns 0, the
     * buffer is empty.
     */
    std::streamsize introspective_read(char* &s);
  
    /**
     * Returns a pointer (through s) and a length of the read.  This
     * pointer is a direct pointer into the internal buffer of this
     * datastructure. The pointer is valid as long as no other
     * operations are performed on this structure.  The length of the
     * introspective_read may be less than the number of bytes
     * requested. Multiple calls to introspective_read may be
     * necessary to read all data in the buffer. If the function
     * returns 0, the buffer is empty.
     */
    std::streamsize introspective_read(char* &s, std::streamsize clen);
  
    /**
       Returns a pointer to the next empty region of the buffer.  The
       return value is the maximum contigious length writable.  When
       writes complete, advance_write should be used to integrate the
       written bytes
    */
    std::streamsize introspective_write(char* &s);
  
    void advance_write(std::streamsize bytes);
  
    inline void consistency_check() const {
      /* ASSERT_GE(head, 0); ASSERT_GE(tail, 0);
         ASSERT_LT(head, bufsize); ASSERT_LE(tail, bufsize);
         if (tail > head) ASSERT_EQ(tail - head, len);
         else if (head < tail) ASSERT_EQ(head + bufsize - tail, len);
         else if (head == tail) ASSERT_EQ(len, 0); */
    }
  
    /** clears the stream */
    void clear();
  
    /** Gets the number of characters in the stream */
    inline std::streamsize size() const {
      return len;
    }
  
    /** Gets the size of the buffer. 
        \note: The useable space is reserved_size() - 1 */
    inline std::streamsize reserved_size() const {
      return bufsize;
    }
  private:
   
    inline bool buffer_full() const {
      return len == bufsize;
    }
  
    char* buffer;
    /** 
     * points to the head of the queue. 
     * Reader reads from here
     */
    std::streamsize head;  
  
    /** 
     * points to one past the end of the queue. 
     * writer writes to here. if tail == head, buffer must be empty
     */
    std::streamsize tail;  
    std::streamsize bufsize; // current size of the buffer
    std::streamsize len;  // number of bytes stored in the buffer
  };

  /**
     A boost source device which can attach to a circular buffer
  */
  struct circular_char_buffer_source {
    circular_char_buffer_source(circular_char_buffer &buf, 
                                size_t maxlen = size_t(-1)):buf(buf), maxlen(maxlen) { }
  
    circular_char_buffer &buf;
    size_t maxlen;
    typedef char        char_type;
    struct category : public boost::iostreams::source_tag { };

    /** to satisfy the optimally buffered tag. Since this is an
        in-memory buffer. the optimal buffer size (for any wrapping 
        stream) is 0. */
    inline std::streamsize optimal_buffer_size() const { return 0; }

    inline std::streamsize read(char* s, std::streamsize n) {
      if ((size_t)(n) > maxlen) n = (std::streamsize)maxlen;
      maxlen -= (size_t)n;
      if (n == 0) return -1;
      else return buf.read(s, n);
    }
  };

  /**
     A boost sink device which can attach to a circular buffer
  */
  struct circular_char_buffer_sink {
    circular_char_buffer_sink(circular_char_buffer &buf):buf(buf) { }
  
    circular_char_buffer &buf;
    typedef char        char_type;
    struct category: public boost::iostreams::sink_tag,
                     public boost::iostreams::multichar_tag { };

    /** to satisfy the optimally buffered tag. Since this is an
        in-memory buffer. the optimal buffer size is 0. */
    inline std::streamsize optimal_buffer_size() const { return 0; }

    inline std::streamsize write(const char* s, std::streamsize n) {
      return buf.write(s, n);
    }
  };

  struct circular_char_buffer_device {
    circular_char_buffer_device(circular_char_buffer &buf):buf(buf) { }
  
    circular_char_buffer &buf;
    typedef char      char_type;
    struct category : public boost::iostreams::bidirectional_device_tag,
                      public boost::iostreams::optimally_buffered_tag{ };

    /** to satisfy the optimally buffered tag. Since this is an
        in-memory buffer. the optimal buffer size is 0. */
    inline std::streamsize optimal_buffer_size() const { return 0; }

    inline std::streamsize write(const char* s, std::streamsize n) {
      return buf.write(s, n);
    }
  
    inline std::streamsize read(char* s, std::streamsize n) {
      return buf.read(s, n);
    }
  };

}
#endif


================================================
FILE: src/graphlab/rpc/circular_iovec_buffer.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_RPC_CIRCULAR_IOVEC_BUFFER_HPP
#define GRAPHLAB_RPC_CIRCULAR_IOVEC_BUFFER_HPP
#include <vector>
#include <sys/socket.h>

namespace graphlab{
namespace dc_impl {

/**
 * \ingroup rpc
 * \internal
 * A circular buffer which maintains a parallel sequence of iovecs.
 * One sequence is basic iovecs
 * The other sequence is used for storing the original unomidifed pointers
 * This is minimally checked. length must be a power of 2
 */
struct circular_iovec_buffer {
  inline circular_iovec_buffer(size_t len = 4096) {
    v.resize(4096);
    parallel_v.resize(4096);
    head = 0;
    tail = 0;
    numel = 0;
  }

  inline bool empty() const {
    return numel == 0;
  }

  size_t size() const {
    return numel;
  }


  void reserve(size_t _n) {
    if (_n <= v.size()) return;
    size_t originalsize = v.size();
    size_t n = v.size();
    // must be a power of 2
    while (n <= _n) n *= 2;

    v.resize(n);
    parallel_v.resize(n);
    if (head >= tail && numel > 0) {
      // there is a loop around
      // we need to fix the shift
      size_t newtail = originalsize;
      for (size_t i = 0;i < tail; ++i) {
        v[newtail] = v[i];
        parallel_v[newtail] = parallel_v[i];
        ++newtail;
      }
      tail = newtail;
    }
  }

  inline void write(const std::vector<iovec>& other, size_t nwrite) {
    reserve(numel + nwrite);
    for (size_t i = 0;i < nwrite; ++i) {
      v[tail] = other[i];
      parallel_v[tail] = other[i];
      tail = (tail + 1) & (v.size() - 1);
    }
    numel += nwrite;

  }

  /**
   * Writes an entry into the buffer, resizing the buffer if necessary.
   * This buffer will take over all iovec pointers and free them when done
   */
  inline void write(const iovec &entry) {
    if (numel == v.size()) {
      reserve(2 * numel);
    }

    v[tail] = entry;
    parallel_v[tail] = entry;
    tail = (tail + 1) & (v.size() - 1); ++numel;
  }


  /**
   * Writes an entry into the buffer, resizing the buffer if necessary.
   * This buffer will take over all iovec pointers and free them when done.
   * This version of write allows the iovec that is sent to be different from the
   * iovec that is freed. (for instance, what is sent could be subarray of
   * what is to be freed.
   */
  inline void write(const iovec &entry, const iovec& actual_ptr_entry) {
    if (numel == v.size()) {
      reserve(2 * numel);
    }

    v[tail] = actual_ptr_entry;
    parallel_v[tail] = entry;
    tail = (tail + 1) & (v.size() - 1); ++numel;
  }


  /**
   * Erases a single iovec from the head and free the pointer
   */
  inline void erase_from_head_and_free() {
    free(v[head].iov_base);
    head = (head + 1) & (v.size() - 1);
    --numel;
  }

  /**
   * Fills a msghdr for unsent data.
   */
  void fill_msghdr(struct msghdr& data) {
    data.msg_iov = &(parallel_v[head]);
    if (head < tail) {
      data.msg_iovlen = tail - head;
    }
    else {
      data.msg_iovlen = v.size() - head;
    }
    data.msg_iovlen = std::min<size_t>(IOV_MAX, data.msg_iovlen);
  }

  /**
   * Advances the head as if some amount of data was sent.
   */
  void sent(size_t len) {
    while(len > 0) {
      size_t curv_sent_len = std::min(len, parallel_v[head].iov_len);
      parallel_v[head].iov_len -= curv_sent_len;
      parallel_v[head].iov_base = (char*)(parallel_v[head].iov_base) + curv_sent_len;
      len -= curv_sent_len;
      if (parallel_v[head].iov_len == 0) {
        erase_from_head_and_free();
      }
    }
  }

  std::vector<struct iovec> v;
  std::vector<struct iovec> parallel_v;
  size_t head;
  size_t tail;
  size_t numel;
};

}
}

#endif


================================================
FILE: src/graphlab/rpc/dc.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <unistd.h>
#include <sys/param.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <ifaddrs.h>
#include <netinet/in.h>

#include <map>
#include <sstream>

#include <boost/unordered_map.hpp>
#include <boost/bind.hpp>
//#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/net_util.hpp>
#include <graphlab/util/mpi_tools.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_tcp_comm.hpp>
//#include <graphlab/rpc/dc_sctp_comm.hpp>
#include <graphlab/rpc/dc_buffered_stream_send2.hpp>
#include <graphlab/rpc/dc_stream_receive.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/dc_services.hpp>

#include <graphlab/rpc/dc_init_from_env.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/rpc/dc_init_from_zookeeper.hpp>


namespace graphlab {

namespace dc_impl {


bool thrlocal_sequentialization_key_initialized = false;
pthread_key_t thrlocal_sequentialization_key;

bool thrlocal_send_buffer_key_initialized = false;
pthread_key_t thrlocal_send_buffer_key;

void thrlocal_send_buffer_key_deleter(void* p) {
  if (p != NULL) {
    thread_local_buffer* buf = (thread_local_buffer*)(p);
    if (buf != NULL) {
      delete buf;
    } 
  }
}

} // namespace dc_impl


procid_t distributed_control::last_dc_procid = 0;
distributed_control* distributed_control::last_dc = NULL;

procid_t distributed_control::get_instance_procid() {
  return last_dc_procid;
}

distributed_control* distributed_control::get_instance() {
  return last_dc;
}


unsigned char distributed_control::set_sequentialization_key(unsigned char newkey) {
  size_t oldval = reinterpret_cast<size_t>(pthread_getspecific(dc_impl::thrlocal_sequentialization_key));
  size_t newval = newkey;
  pthread_setspecific(dc_impl::thrlocal_sequentialization_key, reinterpret_cast<void*>(newval));
  assert(oldval < 256);
  return (unsigned char)oldval;
}

unsigned char distributed_control::new_sequentialization_key() {
  size_t oldval = reinterpret_cast<size_t>(pthread_getspecific(dc_impl::thrlocal_sequentialization_key));
  size_t newval = (oldval + 1) % 256;
  pthread_setspecific(dc_impl::thrlocal_sequentialization_key, reinterpret_cast<void*>(newval));
  assert(oldval < 256);
  return (unsigned char)oldval;
}

unsigned char distributed_control::get_sequentialization_key() {
  size_t oldval = reinterpret_cast<size_t>(pthread_getspecific(dc_impl::thrlocal_sequentialization_key));
  assert(oldval < 256);
  return (unsigned char)oldval;
}


distributed_control::distributed_control() {
  dc_init_param initparam;
  if (init_param_from_env(initparam)) {
    logstream(LOG_INFO) << "Distributed Control Initialized from Environment" << std::endl;
  } else if (init_param_from_zookeeper(initparam)) {
      logstream(LOG_INFO) << "Distributed Control Initialized from Zookeeper" << std::endl;
  } else if (mpi_tools::initialized() && init_param_from_mpi(initparam)) {
      logstream(LOG_INFO) << "Distributed Control Initialized from MPI" << std::endl;
  }
  else {
    logstream(LOG_INFO) << "Shared Memory Execution" << std::endl;
    // get a port and socket
    std::pair<size_t, int> port_and_sock = get_free_tcp_port();
    size_t port = port_and_sock.first;
    int sock = port_and_sock.second;

    initparam.machines.push_back(std::string("localhost:") + tostr(port));
    initparam.curmachineid = 0;
    initparam.initstring = std::string(" __sockhandle__=") + tostr(sock) + " ";
    initparam.numhandlerthreads = RPC_DEFAULT_NUMHANDLERTHREADS;
    initparam.commtype = RPC_DEFAULT_COMMTYPE;
  }
  init(initparam.machines,
        initparam.initstring,
        initparam.curmachineid,
        initparam.numhandlerthreads,
        initparam.commtype);
  INITIALIZE_TRACER(dc_receive_queuing, "dc: time spent on enqueue");
  INITIALIZE_TRACER(dc_receive_multiplexing, "dc: time spent exploding a chunk");
  INITIALIZE_TRACER(dc_call_dispatch, "dc: time spent issuing RPC calls");
}

distributed_control::distributed_control(dc_init_param initparam) {
  init(initparam.machines,
        initparam.initstring,
        initparam.curmachineid,
        initparam.numhandlerthreads,
        initparam.commtype);
  INITIALIZE_TRACER(dc_receive_queuing, "dc: time spent on enqueue");
  INITIALIZE_TRACER(dc_receive_multiplexing, "dc: time spent exploding a chunk");
  INITIALIZE_TRACER(dc_call_dispatch, "dc: time spent issuing RPC calls");
}


distributed_control::~distributed_control() {
  // detach the instance
  last_dc = NULL;
  last_dc_procid = 0;
  distributed_services->full_barrier();
  logstream(LOG_INFO) << "Shutting down distributed control " << std::endl;
  FREE_CALLBACK_EVENT(EVENT_NETWORK_BYTES);
  FREE_CALLBACK_EVENT(EVENT_RPC_CALLS);
  // call all deletion callbacks
  for (size_t i = 0; i < deletion_callbacks.size(); ++i) {
    deletion_callbacks[i]();
  }

  size_t bytessent = bytes_sent();
  for (size_t i = 0;i < senders.size(); ++i) {
    senders[i]->flush();
  }

  comm->close();

  for (size_t i = 0;i < senders.size(); ++i) {
    delete senders[i];
  }
  senders.clear();

  pthread_key_delete(dc_impl::thrlocal_sequentialization_key);
  pthread_key_delete(dc_impl::thrlocal_send_buffer_key);

  size_t bytesreceived = bytes_received();
  for (size_t i = 0;i < receivers.size(); ++i) {
    receivers[i]->shutdown();
    delete receivers[i];
  }
  receivers.clear();
  // shutdown function call handlers
  for (size_t i = 0;i < fcallqueue.size(); ++i) fcallqueue[i].stop_blocking();
  fcallhandlers.join();
  logstream(LOG_INFO) << "Bytes Sent: " << bytessent << std::endl;
  logstream(LOG_INFO) << "Calls Sent: " << calls_sent() << std::endl;
  logstream(LOG_INFO) << "Network Sent: " << network_bytes_sent() << std::endl;
  logstream(LOG_INFO) << "Bytes Received: " << bytesreceived << std::endl;
  logstream(LOG_INFO) << "Calls Received: " << calls_received() << std::endl;

  delete comm;

}


void distributed_control::exec_function_call(procid_t source,
                                            unsigned char packet_type_mask,
                                            const char* data,
                                            const size_t len) {
  BEGIN_TRACEPOINT(dc_call_dispatch);
  // extract the dispatch function
  iarchive arc(data, len);
  size_t f;
  arc >> f;
  // a regular funcion call
  dc_impl::dispatch_type dispatch = (dc_impl::dispatch_type)f;
  dispatch(*this, source, packet_type_mask, data + arc.off, len - arc.off);
  if ((packet_type_mask & CONTROL_PACKET) == 0) inc_calls_received(source);
  END_TRACEPOINT(dc_call_dispatch);
}

unsigned char distributed_control::get_block_sequentialization_key(fcallqueue_entry& fcallblock) {
  unsigned char seq_key = 0;
  char* data = fcallblock.chunk_src;
  size_t remaininglen = fcallblock.chunk_len;
  // loop through all the messages
  while(remaininglen > 0) {
    ASSERT_GE(remaininglen, sizeof(dc_impl::packet_hdr));
    dc_impl::packet_hdr hdr = *reinterpret_cast<dc_impl::packet_hdr*>(data);
    ASSERT_LE(hdr.len, remaininglen);
    if (hdr.sequentialization_key != 0) {
      seq_key = hdr.sequentialization_key;
      break;
    }
    data += sizeof(dc_impl::packet_hdr) + hdr.len;
    remaininglen -= sizeof(dc_impl::packet_hdr) + hdr.len;
  }
  return seq_key;
}

void distributed_control::deferred_function_call_chunk(char* buf, size_t len, procid_t src) {
  BEGIN_TRACEPOINT(dc_receive_queuing);
  fcallqueue_entry* fc = new fcallqueue_entry;
  fc->chunk_src = buf;
  fc->chunk_len = len;
  fc->chunk_ref_counter = NULL;
  fc->is_chunk = true;
  fc->source = src;
  fcallqueue_length.inc();

#ifdef RPC_BLOCK_STRIPING
  static size_t __idx;
  // approximate balancing
  size_t idx = __idx++ % fcallqueue.size();
  fcallqueue[idx].enqueue(fc, !fcall_handler_blockers.get(idx));
#else
  idx = src % fcallqueue.size();
  fcallqueue[idx].enqueue(fc, !fcall_handler_blockers.get(idx));
#endif
/*
  if (get_block_sequentialization_key(*fc) > 0) {
    fcallqueue[src % fcallqueue.size()].enqueue(fc);
  } else {
    const uint32_t prod = 
        random::fast_uniform(uint32_t(0), 
                             uint32_t(fcallqueue.size() * fcallqueue.size() - 1));
    const uint32_t r1 = prod / fcallqueue.size();
    const uint32_t r2 = prod % fcallqueue.size();
    uint32_t idx = (fcallqueue[r1].size() < fcallqueue[r2].size()) ? r1 : r2;  
    fcallqueue[idx].enqueue(fc);
  } */

//   const uint32_t prod = 
//       random::fast_uniform(uint32_t(0), 
//                            uint32_t(fcallqueue.size() * fcallqueue.size() - 1));
//   const uint32_t r1 = prod / fcallqueue.size();
//   const uint32_t r2 = prod % fcallqueue.size();
//   uint32_t idx = (fcallqueue[r1].size() < fcallqueue[r2].size()) ? r1 : r2;  
//   fcallqueue[idx].enqueue(fc);
  END_TRACEPOINT(dc_receive_queuing);
}


void distributed_control::process_fcall_block(fcallqueue_entry &fcallblock) {
  if (fcallblock.is_chunk == false) {
    for (size_t i = 0;i < fcallblock.calls.size(); ++i) {
      fcallqueue_length.dec();
      exec_function_call(fcallblock.source, fcallblock.calls[i].packet_mask,
                        fcallblock.calls[i].data, fcallblock.calls[i].len);
    }
    if (fcallblock.chunk_ref_counter != NULL) {
      if (fcallblock.chunk_ref_counter->dec(fcallblock.calls.size()) == 0) {
        delete fcallblock.chunk_ref_counter;
        free(fcallblock.chunk_src);
      }
    }
  }
#ifdef RPC_DO_NOT_BREAK_BLOCKS
  else {
    fcallqueue_length.dec();

    //parse the data in fcallblock.data
    char* data = fcallblock.chunk_src;
    size_t remaininglen = fcallblock.chunk_len;
    //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, BYTES_EVENT, remaininglen);
    while(remaininglen > 0) {
      ASSERT_GE(remaininglen, sizeof(dc_impl::packet_hdr));
      dc_impl::packet_hdr hdr = *reinterpret_cast<dc_impl::packet_hdr*>(data);
      ASSERT_LE(hdr.len, remaininglen);

      if (!(hdr.packet_type_mask & CONTROL_PACKET)) {
        global_bytes_received[hdr.src].inc(hdr.len);
      }

      exec_function_call(fcallblock.source, hdr.packet_type_mask,
                         data + sizeof(dc_impl::packet_hdr),
                         hdr.len);
      data += sizeof(dc_impl::packet_hdr) + hdr.len;
      remaininglen -= sizeof(dc_impl::packet_hdr) + hdr.len;
    }
    free(fcallblock.chunk_src);
  }
#else
  else {
    fcallqueue_length.dec();
    BEGIN_TRACEPOINT(dc_receive_multiplexing);
    fcallqueue_entry* queuebufs[fcallqueue.size()];
    atomic<size_t>* refctr = new atomic<size_t>(0);

    fcallqueue_entry immediate_queue;

    immediate_queue.chunk_src = fcallblock.chunk_src;
    immediate_queue.chunk_ref_counter = refctr;
    immediate_queue.chunk_len = 0;
    immediate_queue.source = fcallblock.source;
    immediate_queue.is_chunk = false;

    for (size_t i = 0;i < fcallqueue.size(); ++i) {
      queuebufs[i] = new fcallqueue_entry;
      queuebufs[i]->chunk_src = fcallblock.chunk_src;
      queuebufs[i]->chunk_ref_counter = refctr;
      queuebufs[i]->chunk_len = 0;
      queuebufs[i]->source = fcallblock.source;
      queuebufs[i]->is_chunk = false;
    }

    //parse the data in fcallblock.data
    char* data = fcallblock.chunk_src;
    size_t remaininglen = fcallblock.chunk_len;
    //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, BYTES_EVENT, remaininglen);
    size_t stripe = 0;
    while(remaininglen > 0) {
      ASSERT_GE(remaininglen, sizeof(dc_impl::packet_hdr));
      dc_impl::packet_hdr hdr = *reinterpret_cast<dc_impl::packet_hdr*>(data);
      ASSERT_LE(hdr.len, remaininglen);

      refctr->value++;


      if ((hdr.packet_type_mask & CONTROL_PACKET)) {
        // control calls are handled immediately with priority.
        immediate_queue.calls.push_back(function_call_block(
                                            data + sizeof(dc_impl::packet_hdr),
                                            hdr.len,
                                            hdr.packet_type_mask));
      } else {
        global_bytes_received[hdr.src].inc(hdr.len);
        if (hdr.sequentialization_key == 0) {
          queuebufs[stripe]->calls.push_back(function_call_block(
                                              data + sizeof(dc_impl::packet_hdr),
                                              hdr.len,
                                              hdr.packet_type_mask));
          ++stripe;
          if (stripe == (fcallblock.source % fcallqueue.size())) ++stripe;
          if (stripe >= fcallqueue.size()) stripe -= fcallqueue.size();
        }
        else {
          size_t idx = (hdr.sequentialization_key % (fcallqueue.size()));
          queuebufs[idx]->calls.push_back(function_call_block(
                                              data + sizeof(dc_impl::packet_hdr),
                                              hdr.len,
                                              hdr.packet_type_mask));
        }
      }
      data += sizeof(dc_impl::packet_hdr) + hdr.len;
      remaininglen -= sizeof(dc_impl::packet_hdr) + hdr.len;
    }
    END_TRACEPOINT(dc_receive_multiplexing);
    BEGIN_TRACEPOINT(dc_receive_queuing);
    for (size_t i = 0;i < fcallqueue.size(); ++i) {
      if (queuebufs[i]->calls.size() > 0) {
        fcallqueue_length.inc(queuebufs[i]->calls.size());
        fcallqueue[i].enqueue(queuebufs[i]);
      }
      else {
        delete queuebufs[i];
      }
    }
    END_TRACEPOINT(dc_receive_queuing);
    if (immediate_queue.calls.size() > 0) process_fcall_block(immediate_queue);
  }
#endif
}

void distributed_control::stop_handler_threads(size_t threadid,
                                                size_t total_threadid) {
  stop_handler_threads_no_wait(threadid, total_threadid);
}

void distributed_control::stop_handler_threads_no_wait(size_t threadid,
                                                       size_t total_threadid) {
  for (size_t i = threadid;i < fcallqueue.size(); i += total_threadid) {
    fcall_handler_blockers.set_bit(i);
  }
}


void distributed_control::start_handler_threads(size_t threadid,
                                                size_t total_threadid) {
  for (size_t i = threadid;i < fcallqueue.size(); i += total_threadid) {
    fcall_handler_blockers.clear_bit(i);
    fcallqueue[i].broadcast();
  }
}

void distributed_control::handle_incoming_calls(size_t threadid,
                                                size_t total_threadid) {
  for (size_t i = threadid;i < fcallqueue.size(); i += total_threadid) {
    if (fcallqueue[i].empty_unsafe() == false) {
      std::deque<fcallqueue_entry*> q;
      fcallqueue[i].swap(q);
      while (!q.empty()) {
        fcallqueue_entry* entry;
        entry = q.front();
        q.pop_front();

        process_fcall_block(*entry);
        delete entry;
      }
    }
  }
}

void distributed_control::fcallhandler_loop(size_t id) {
  // pop an element off the queue
//  float t = timer::approx_time_seconds();
  fcall_handler_active[id].inc();
  while(fcallqueue[id].is_alive()) {
    fcallqueue[id].wait_for_data();
    std::deque<fcallqueue_entry*> q;
    fcallqueue[id].swap(q);
    while (!q.empty()) {
      fcallqueue_entry* entry;
      entry = q.front();
      q.pop_front();

      process_fcall_block(*entry);
      delete entry;
    }
    //  std::cerr << "Handler " << id << " died." << std::endl;
  }
  fcall_handler_active[id].dec();
}


std::map<std::string, std::string>
  distributed_control::parse_options(std::string initstring) {
  std::map<std::string, std::string> options;
  std::replace(initstring.begin(), initstring.end(), ',', ' ');
  std::replace(initstring.begin(), initstring.end(), ';', ' ');
  std::string opt, value;
  // read till the equal
  std::stringstream s(initstring);
  while(s.good()) {
    getline(s, opt, '=');
    if (s.bad() || s.eof()) break;
    getline(s, value, ' ');
    if (s.bad()) break;
    options[trim(opt)] = trim(value);
  }
  return options;
}

void distributed_control::init(const std::vector<std::string> &machines,
            const std::string &initstring,
            procid_t curmachineid,
            size_t numhandlerthreads,
            dc_comm_type commtype) {

  if (numhandlerthreads == RPC_DEFAULT_NUMHANDLERTHREADS) {
    // autoconfigure
    if (thread::cpu_count() > 2) numhandlerthreads = thread::cpu_count() - 2;
    else numhandlerthreads = 2;
  }
  ASSERT_MSG(machines.size() <= RPC_MAX_N_PROCS,
             "Number of processes exceeded hard limit of %d", RPC_MAX_N_PROCS);

  // initialize thread local storage
  if (dc_impl::thrlocal_sequentialization_key_initialized == false) {
    dc_impl::thrlocal_sequentialization_key_initialized = true;
    int err = pthread_key_create(&dc_impl::thrlocal_sequentialization_key, NULL);
    ASSERT_EQ(err, 0);
  }

  if (dc_impl::thrlocal_send_buffer_key_initialized == false) {
    dc_impl::thrlocal_send_buffer_key = true;
    int err = pthread_key_create(&dc_impl::thrlocal_send_buffer_key, dc_impl::thrlocal_send_buffer_key_deleter);
    ASSERT_EQ(err, 0);
  }

  //-------- Initialize the full barrier ---------
  full_barrier_in_effect = false;
  procs_complete.resize(machines.size());
  //-----------------------------------------------

  // initialize the counters

  global_calls_sent.resize(machines.size());
  global_calls_received.resize(machines.size());
  global_bytes_received.resize(machines.size());
  fcallqueue.resize(numhandlerthreads);

  // options
  set_fast_track_requests(true);

  // parse the initstring
  std::map<std::string,std::string> options = parse_options(initstring);

  if (commtype == TCP_COMM) {
    comm = new dc_impl::dc_tcp_comm();
  } else {
    ASSERT_MSG(false, "Unexpected value for comm type");
  }
  for (procid_t i = 0; i < machines.size(); ++i) {
    receivers.push_back(new dc_impl::dc_stream_receive(this, i));
    senders.push_back(new dc_impl::dc_buffered_stream_send2(this, comm, i));
  }
  // create the handler threads
  // store the threads in the threadgroup
  fcall_handler_active.resize(numhandlerthreads);
  fcall_handler_blockers.resize(numhandlerthreads);
  fcallhandlers.set_stacksize(256*1024); // 256K
  for (size_t i = 0;i < numhandlerthreads; ++i) {
    fiber_control::affinity_type affinity;
    affinity.clear();
    affinity.set_bit(i);
    fcallhandlers.launch(boost::bind(&distributed_control::fcallhandler_loop,
                                      this, i), affinity);
  }


  // set the local proc values
  localprocid = curmachineid;
  localnumprocs = machines.size();


  // construct the services
  distributed_services = new dc_services(*this);
  // start the machines

  // improves reliability of initialization
#ifdef HAS_MPI
  if (mpi_tools::initialized()) MPI_Barrier(MPI_COMM_WORLD);
#endif

  comm->init(machines, options, curmachineid,
              receivers, senders);
  logstream(LOG_INFO) << "TCP Communication layer constructed." << std::endl;
  if (localprocid == 0) {
    logstream(LOG_EMPH) << "Cluster of " << machines.size() << " instances created." << std::endl;
    // check for duplicate IP addresses
    std::map<std::string, size_t> ipaddresses;
    for (size_t i = 0; i < machines.size(); ++i ){
      size_t pos = machines[i].find(":");
      ASSERT_NE(pos, std::string::npos);
      std::string address = machines[i].substr(0, pos);
      ipaddresses[address]++;
    }
    bool hasduplicate = false;
    std::map<std::string, size_t>::const_iterator iter = ipaddresses.begin();
    while (iter != ipaddresses.end()) {
      if (iter->second > 1) {
        hasduplicate = true;
        logstream(LOG_WARNING) << "Duplicate IP address: " << iter->first << std::endl;
      }
      ++iter;
    }
    if (hasduplicate) {
      logstream(LOG_WARNING) << "For maximum performance, GraphLab strongly prefers running just one process per machine." << std::endl;
    }
  }


  // improves reliability of initialization
#ifdef HAS_MPI
  if (mpi_tools::initialized()) MPI_Barrier(MPI_COMM_WORLD);
#endif

  // set the value of the last_dc for the get_instance function
  last_dc = this;
  // set the static variable for the get_instance_procid() function
  last_dc_procid = localprocid;

  barrier();
  // initialize the empty stream
  nullstrm.open(boost::iostreams::null_sink());

  // initialize the event log

  INITIALIZE_EVENT_LOG(*this);
  ADD_CUMULATIVE_CALLBACK_EVENT(EVENT_NETWORK_BYTES, "Network Utilization",
      "MB", boost::bind(&distributed_control::network_megabytes_sent, this));
  ADD_CUMULATIVE_CALLBACK_EVENT(EVENT_RPC_CALLS, "RPC Calls",
      "Calls", boost::bind(&distributed_control::calls_sent, this));
}


void distributed_control::barrier() {
  distributed_services->barrier();
}

void distributed_control::flush() {
  for (procid_t i = 0;i < senders.size(); ++i) {
    senders[i]->flush();
  }
}

void distributed_control::flush(procid_t p) {
  senders[p]->flush();
}

void distributed_control::flush_soon() {
  for (procid_t i = 0;i < senders.size(); ++i) {
    senders[i]->flush_soon();
  }
}


void distributed_control::flush_soon(procid_t p) {
  senders[p]->flush_soon();
}
/*****************************************************************************
                      Implementation of Full Barrier
*****************************************************************************/
/* It is unfortunate but this is copy paste code from dc_dist_object.hpp
  I thought for a long time how to implement this without copy pasting and
  I can't think of a simple enough solution.

  Part of the issue is that the "context" concept was not built into to the
  RPC system to begin with and is currently folded in through the dc_dist_object system.
  As a result, the global context becomes very hard to define properly.
  Including a dc_dist_object as a member only resolves the high level contexts
  such as barrier, broadcast, etc which do not require intrusive access into
  deeper information about the context. The full barrier however, requires deep
  information about the context which cannot be resolved easily.
*/

/**
This barrier ensures globally across all machines that
all calls issued prior to this barrier are completed before
returning. This function could return prematurely if
other threads are still issuing function calls since we
cannot differentiate between calls issued before the barrier
and calls issued while the barrier is being evaluated.
*/
void distributed_control::full_barrier() {
  // gather a sum of all the calls issued to machine 0
  std::vector<size_t> calls_sent_to_target(numprocs(), 0);
  for (size_t i = 0;i < numprocs(); ++i) {
    calls_sent_to_target[i] = global_calls_sent[i].value;
  }

  // tell node 0 how many calls there are
  std::vector<std::vector<size_t> > all_calls_sent(numprocs());
  all_calls_sent[procid()] = calls_sent_to_target;
  all_gather(all_calls_sent, true);

  // get the number of calls I am supposed to receive from each machine
  calls_to_receive.clear(); calls_to_receive.resize(numprocs(), 0);
  for (size_t i = 0;i < numprocs(); ++i) {
    calls_to_receive[i] += all_calls_sent[i][procid()];
//    std::cout << "Expecting " << calls_to_receive[i] << " calls from " << i << std::endl;
  }
  // clear the counters
  num_proc_recvs_incomplete.value = numprocs();
  procs_complete.clear();
  // activate the full barrier
  full_barrier_in_effect = true;
  __asm("mfence");
  // begin one pass to set all which are already completed
  for (procid_t i = 0;i < numprocs(); ++i) {
    if (global_calls_received[i].value >= calls_to_receive[i]) {
      if (procs_complete.set_bit(i) == false) {
        num_proc_recvs_incomplete.dec();
      }
    }
  }

  full_barrier_lock.lock();
  while (num_proc_recvs_incomplete.value > 0) full_barrier_cond.wait(full_barrier_lock);
  full_barrier_lock.unlock();
  full_barrier_in_effect = false;
  barrier();
//   for (size_t i = 0; i < numprocs(); ++i) {
//     std::cout << "Received " << global_calls_received[i].value << " from " << i << std::endl;
//   }
}


} //namespace graphlab


================================================
FILE: src/graphlab/rpc/dc.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_HPP
#define GRAPHLAB_DC_HPP
#include <iostream>
#include <boost/iostreams/stream.hpp>
#include <boost/function.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/parallel/fiber_conditional.hpp>
#include <graphlab/util/resizing_array_sink.hpp>
#include <graphlab/util/fiber_blocking_queue.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/serialization/serialization_includes.hpp>

#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>

#include <graphlab/rpc/dc_receive.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/rpc/dc_comm_base.hpp>
#include <graphlab/rpc/dc_dist_object_base.hpp>

#include <graphlab/rpc/is_rpc_call.hpp>
#include <graphlab/rpc/function_call_issue.hpp>
#include <graphlab/rpc/function_broadcast_issue.hpp>
#include <graphlab/rpc/request_issue.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <graphlab/rpc/thread_local_send_buffer.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>


namespace graphlab {


/**
 *  \ingroup rpc
 *  \brief Distributed control constructor parameters.
 *
 *  Provides the  communication layer with a list of ip addresses and
 *  port numbers which enumerate all the machines to establish connections
 *  with.
 *
 *  You should not need to this. The default constructor in
 *  graphlab::distributed_control does it for you.
 *  See \ref RPC for usage details.
 */
struct dc_init_param{
  /** A vector containing a list of hostnames/ipaddresses and port numbers
  * of all machines participating in this RPC program.
  * for instance:
  * \code
  * machines.push_back("127.0.0.1:10000");
  * machines.push_back("127.0.0.1:10001");
  * \endcode
  */
  std::vector<std::string> machines;

  /** Additional construction options of the form
    "key1=value1,key2=value2".

    There are no available options at this time.

    Internal options which should not be used
    \li \b __socket__=NUMBER Forces TCP comm to use this socket number for its
                             listening socket instead of creating a new one.
                             The socket must already be bound to the listening
                             port.
  */
  std::string initstring;

  /** The index of this machine into the machines vector */
  procid_t curmachineid;
  /** Number of background RPC handling threads to create */
  size_t numhandlerthreads;
  /** The communication method. */
  dc_comm_type commtype;

  /**
   * Constructs a dc_init_param object.
   * \param numhandlerthreads Optional Argument. The number of handler
   *                          threads to create. Defaults to
   *                          \ref RPC_DEFAULT_NUMHANDLERTHREADS
   * \param commtype The Communication type. The only accepted value now is
   *                 TCP_COMM
   */
  dc_init_param(size_t numhandlerthreads = RPC_DEFAULT_NUMHANDLERTHREADS,
                dc_comm_type commtype = RPC_DEFAULT_COMMTYPE):
    numhandlerthreads(numhandlerthreads),
    commtype(commtype) {
  }
};


// forward declarations
class dc_services;

namespace dc_impl {
  class dc_buffered_stream_send2;
  class dc_stream_receive;
}

/**
 * \ingroup rpc
 * \brief The distributed control object is primary means of communication
 * between the distributed GraphLab processes.
 *
 * The distributed_control object provides asynchronous, multi-threaded
 * Remote Procedure Call (RPC) services to allow distributed GraphLab
 * processes to communicate with each other. Currently, the only
 * communication method implemented is TCP/IP.
 * There are several ways of setting up the communication layer, but the most
 * reliable, and the preferred method, is to "bootstrap" using MPI. See your
 * local MPI documentation for details on how to launch MPI jobs.
 *
 * To construct a distributed_control object, the simplest method is to just
 * invoke the default constructor.
 *
 * \code
 *  // initialize MPI
 *  mpi_tools::init(argc, argv);
 *  // construct distributed control object
 *  graphlab::distributed_control dc;
 * \endcode
 *
 * After which all distributed control services will operate correctly.
 *
 * Each process is assigned a sequential process ID at starting at 0.
 * i.e. The first process will have a process ID of 0, the second process
 * will have an ID of 1, etc. distributed_control::procid() can be used to
 * obtain the current machine's process ID, and distributed_control::numprocs()
 * can be used to obtain the total number of processes.
 *
 * The primary functions used to communicate between processes are
 * distributed_control::remote_call() and
 * distributed_control::remote_request(). These functions are thread-safe and
 * can be called very rapidly as they only write into a local buffer.
 * Communication is handled by a background thread. On the remote side,
 * RPC calls are handled in parallel by a thread pool, and thus may be
 * parallelized arbitrarily. Operations such as
 * distributed_control::full_barrier(), or the sequentialization key
 * can be used to get finer grained control over order of execution on the
 * remote machine.
 *
 * A few other additional helper functions are also provided to support
 * "synchronous" modes of communication. These functions are not thread-safe
 * and can only be called on one thread per machine. These functions block
 * until all machines call the same function. For instance, if gather() is
 * called on one machine, it will not return until all machines call gather().
 *
 * \li distributed_control::barrier()
 * \li distributed_control::full_barrier()
 * \li distributed_control::broadcast()
 * \li distributed_control::all_reduce()
 * \li distributed_control::all_reduce2()
 * \li distributed_control::gather()
 * \li distributed_control::all_gather()
 *
 * \note These synchronous operations are modeled after some MPI collective
 * operations. However, these operations here are not particularly optimized
 * and will generally be slower than their MPI counterparts. However, the
 * implementations here are much easier to use, relying extensively on
 * serialization to simplify communication.
 *
 * To support Object Oriented Programming like methodologies, we allow the
 * creation of <b>Distributed Objects</b> through graphlab::dc_dist_object.
 * dc_dist_object allows a class to construct its own local copy of
 * a distributed_control object allowing instances of the class to communicate
 * with each other across the network.
 *
 * See \ref RPC for usage examples.
 */
class distributed_control{
  public:
        /**  \internal
         * Each element of the function call queue is a data/len pair */
    struct function_call_block{
      function_call_block() {}

      function_call_block(char* data, size_t len,
                          unsigned char packet_mask):
                          data(data), len(len), packet_mask(packet_mask){}

      char* data;
      size_t len;
      unsigned char packet_mask;
    };
  private:
   /// initialize receiver threads. private form of the constructor
   void init(const std::vector<std::string> &machines,
             const std::string &initstring,
             procid_t curmachineid,
             size_t numhandlerthreads,
             dc_comm_type commtype = RPC_DEFAULT_COMMTYPE);

  /// a pointer to the communications subsystem
  dc_impl::dc_comm_base* comm;

  /// senders and receivers to all machines
  std::vector<dc_impl::dc_receive*> receivers;
  std::vector<dc_impl::dc_send*> senders;

  /// A thread group of function call handlers
  fiber_group fcallhandlers;
  std::vector<atomic<size_t> > fcall_handler_active;
  dense_bitset fcall_handler_blockers;

  struct fcallqueue_entry {
    std::vector<function_call_block> calls;
    char* chunk_src;
    size_t chunk_len;
    atomic<size_t>* chunk_ref_counter;
    procid_t source;
    bool is_chunk;
  };
  /// a queue of functions to be executed
  std::vector<fiber_blocking_queue<fcallqueue_entry*> > fcallqueue;
  // number of blocks waiting to be deserialized + the number of
  // incomplete function calls
  atomic<size_t> fcallqueue_length;

  /// object registrations;
  std::vector<void*> registered_objects;
  std::vector<dc_impl::dc_dist_object_base*> registered_rmi_instance;

  /// For convenience, we provide a instance of dc_services
  dc_services* distributed_services;

  /// ID of the local machine
  procid_t localprocid;


  /// Number of machines
  procid_t localnumprocs;

  std::vector<atomic<size_t> > global_calls_sent;
  std::vector<atomic<size_t> > global_calls_received;

  std::vector<atomic<size_t> > global_bytes_received;

  std::vector<boost::function<void(void)> > deletion_callbacks;

  template <typename T> friend class dc_dist_object;
  friend class dc_impl::dc_stream_receive;
  friend class dc_impl::dc_buffered_stream_send2;
  friend struct dc_impl::thread_local_buffer;

  /// disable the operator= by placing it in private
  distributed_control& operator=(const distributed_control& dc) { return *this; }


  std::map<std::string, std::string> parse_options(std::string initstring);

  volatile inline size_t num_registered_objects() {
    return registered_objects.size();
  }


  DECLARE_TRACER(dc_receive_queuing);
  DECLARE_TRACER(dc_receive_multiplexing);
  DECLARE_TRACER(dc_call_dispatch);

  DECLARE_EVENT(EVENT_NETWORK_BYTES);
  DECLARE_EVENT(EVENT_RPC_CALLS);
 public:

  /**
   * Default constructor. Automatically tries to read the initialization
   * from environment variables, or from MPI (if MPI is initialized).
   */
  distributed_control();

  /**
   * Passes custom constructed initialization parameters in
   * \ref dc_init_param
   *
   * Though dc_init_param can be obtained from environment variables using
   * dc_init_from_env() or from MPI using dc_init_from_mpi(),
   * using the default constructor is prefered.
   */
  explicit distributed_control(dc_init_param initparam);

  ~distributed_control();


  // The procid of the last distributed_control object created
  // this is quite legacy stuff when we technically permitted multiple DC
  // objects. Now, a lot of the system is built around the assumption
  // of a singleton DC.
  static procid_t last_dc_procid;
  // a pointer to the last distributed_control object created
  // this is quite legacy stuff when we technically permitted multiple DC
  // objects. Now, a lot of the system is built around the assumption
  // of a singleton DC.
  static distributed_control* last_dc;

  /**
   * Gets the procid of the last distributed_control instance created.
   * If there is no distributed_control instance, this returns 0.
   * For instance, this returns the current machine's procid if there is only
   * one distributed_control.
   */
  static procid_t get_instance_procid();

  inline size_t num_handler_threads() const {
    return fcallqueue.size();
  }

  /**
   * Gets a pointer to the last distributed_control instance created.
   * If there is no distributed_control instance, this returns NULL.
   */
  static distributed_control* get_instance();

  /// returns the id of the current process
  inline procid_t procid() const {
    return localprocid;
  }

  /// returns the number of processes in total.
  inline procid_t numprocs() const {
    return localnumprocs;
  }


  bool use_fast_track_requests;

  /// Sets the fast track status, returning the previous value
  bool set_fast_track_requests(bool val) {
    bool ret = use_fast_track_requests;
    use_fast_track_requests = val;
    return ret;
  }

  /// Returns true if we should fast track all request messages
  bool fast_track_requests() {
    return use_fast_track_requests;
  }

  /**
   *  \brief Registers a callback which will be called on deletion of the
   *         distributed_control object.
   *
   *  This function is useful for distributed static variables which may
   *  be only be deleted after main().
   */
  void register_deletion_callback(boost::function<void(void)> deleter) {
    deletion_callbacks.push_back(deleter);
  }

  /**
  \brief Sets the sequentialization key to a new value, returning the
  previous value.

  All RPC calls made using the same key value (as long as the key is non-zero)
  will sequentialize. RPC calls made while the key value is 0 can be
  run in parallel in arbitrary order.

  \code
  oldval = distributed_control::set_sequentialization_key(new_key);
  // ...
  // ... do stuff
  // ...
  set_sequentialization_key(oldval);
  \endcode

  The key value is <b>thread-local</b> thus setting the key value in
  one thread does not affect the key value in another thread.
  */
  static unsigned char set_sequentialization_key(unsigned char newkey);

  /**
  \brief Creates a new sequentialization key, returning the old value.

  All RPC calls made using the same key value (as long as the key is non-zero)
  will sequentialize.   RPC calls made while the key value is 0 can be run in
  parallel in arbitrary order.  However, since new_sequentialization_key() uses
  a very naive key selection system, we recommend the use of
  set_sequentialization_key().

  User should
  \code
  oldval = distributed_control::new_sequentialization_key();
  // ...
  // ... do stuff
  // ...
  set_sequentialization_key(oldval);
  \endcode

  The key value is <b>thread-local</b> thus setting the key value in
  one thread does not affect the key value in another thread.
  */
  static unsigned char new_sequentialization_key();

  /** \brief gets the current sequentialization key. This function is not
   * generally useful.
   */
  static unsigned char get_sequentialization_key();


  /*
   * The key RPC communication functions are all macro generated
   * and doxygen does not like them so much.
   * Here, we will block all of them out
   * and have another set of "fake" functions later on which are wrapped
   * with a #if 0 so C++ will ignore them.
   */

  /// \cond GRAPHLAB_INTERNAL


  /*
  This generates the interface functions for the standard calls, basic calls
  The generated code looks like this:

  template<typename F , typename T0> void remote_call (procid_t target, F remote_function , const T0 &i0 )
  {
    ASSERT_LT(target, senders.size());
    dc_impl::remote_call_issue1 <F , T0> ::exec(senders[target],
                                                STANDARD_CALL,
                                                target,
                                                remote_function ,
                                                i0 );
  }
  The arguments passed to the RPC_INTERFACE_GENERATOR ARE: (interface name, issue processor name, flags)

  */
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
  #define GENI(Z,N,_) BOOST_PP_CAT(i, N)
  #define GENT(Z,N,_) BOOST_PP_CAT(T, N)
  #define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);

  #define RPC_INTERFACE_GENERATOR(Z,N,FNAME_AND_CALL) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  void  BOOST_PP_TUPLE_ELEM(3,0,FNAME_AND_CALL) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, senders.size()); \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,FNAME_AND_CALL),N) \
        <F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(senders[target],  BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL), target, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  }   \

  /*
  Generates the interface functions. 3rd argument is a tuple (interface name, issue name, flags)
  */
  BOOST_PP_REPEAT(6, RPC_INTERFACE_GENERATOR, (remote_call, dc_impl::remote_call_issue, STANDARD_CALL) )
  BOOST_PP_REPEAT(6, RPC_INTERFACE_GENERATOR, (reply_remote_call,dc_impl::remote_call_issue, STANDARD_CALL | FLUSH_PACKET) )
  BOOST_PP_REPEAT(6, RPC_INTERFACE_GENERATOR, (control_call, dc_impl::remote_call_issue, (STANDARD_CALL | CONTROL_PACKET)) )


#define BROADCAST_INTERFACE_GENERATOR(Z,N,FNAME_AND_CALL) \
  template<typename Iterator, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  void  BOOST_PP_TUPLE_ELEM(3,0,FNAME_AND_CALL) (Iterator target_begin, Iterator target_end, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    if (target_begin == target_end) return;               \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,FNAME_AND_CALL),N) \
        <Iterator, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(senders,  BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL), target_begin, target_end, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  }   \

  BOOST_PP_REPEAT(6, BROADCAST_INTERFACE_GENERATOR, (remote_call, dc_impl::remote_broadcast_issue, STANDARD_CALL) )


  #define CUSTOM_REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, size_t handle, unsigned char flags, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(2,1,ARGS),N) \
        <F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(senders[target],  handle, flags, target, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  }   


  #define FUTURE_REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, senders.size()); \
    request_future<__GLRPC_FRESULT> reply;      \
    custom_remote_request(target,  reply.get_handle(), BOOST_PP_TUPLE_ELEM(2,1,ARGS), remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    return reply; \
  }  


  #define REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    request_future<__GLRPC_FRESULT> reply;      \
    custom_remote_request(target,  reply.get_handle(), BOOST_PP_TUPLE_ELEM(2,1,ARGS), remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    return reply(); \
  } 


  /*
  Generates the interface functions. 3rd argument is a tuple (interface name, issue name, flags)
  */
  BOOST_PP_REPEAT(7, CUSTOM_REQUEST_INTERFACE_GENERATOR, (void custom_remote_request, dc_impl::remote_request_issue) )
   BOOST_PP_REPEAT(7, REQUEST_INTERFACE_GENERATOR, (typename dc_impl::function_ret_type<__GLRPC_FRESULT>::type remote_request, (STANDARD_CALL | FLUSH_PACKET)) )
  BOOST_PP_REPEAT(7, FUTURE_REQUEST_INTERFACE_GENERATOR, (request_future<__GLRPC_FRESULT> future_remote_request, (STANDARD_CALL)) )


  #undef RPC_INTERFACE_GENERATOR
  #undef BROADCAST_INTERFACE_GENERATOR
  #undef REQUEST_INTERFACE_GENERATOR
  #undef FUTURE_REQUEST_INTERFACE_GENERATOR
  #undef CUSTOM_REQUEST_INTERFACE_GENERATOR
  #undef GENARC
  #undef GENT
  #undef GENI
  #undef GENARGS
  /// \endcond

/*************************************************************************
 *           Here begins the Doxygen fake functions block                *
 *************************************************************************/

#if DOXYGEN_DOCUMENTATION

/**
 * \brief Performs a non-blocking RPC call to the target machine
 * to run the provided function pointer.
 *
 * remote_call() calls the function "fn" on a target remote machine. Provided
 * arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, the return value is lost.
 *
 * remote_call() is non-blocking and does not wait for the target machine
 * to complete execution of the function. Different remote_calls may be handled
 * by different threads on the target machine and thus the target function
 * should be made thread-safe.
 * Alternatively, see set_sequentialization_key()
 * to force sequentialization of groups of remote calls.
 *
 * If blocking operation is desired, remote_request() may be used.
 * Alternatively, a full_barrier() may also be used to wait for completion of
 * all incomplete RPC calls.
 *
 * Example:
 * \code
 * // A print function is defined
 * void print(std::string s) {
 *   std::cout << s << "\n";
 * }
 *
 * ... ...
 * // call the print function on machine 1 to print "hello"
 * dc.remote_call(1, print, "hello");
 * \endcode
 *
 *
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 */
  void remote_call(procid_t targetmachine, Fn fn, ...);


/**
 * \brief Performs a non-blocking RPC call to a collection of machines
 * to run the provided function pointer.
 *
 * This function calls the provided function pointer on a collection of
 * machines contained in the iterator range [begin, end).
 * Provided arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, the return value is lost.
 *
 * This function is functionally equivalent to:
 *
 * \code
 * while(machine_begin != machine_end) {
 *  remote_call(*machine_begin, fn, ...);
 *  ++machine_begin;
 * }
 * \endcode
 *
 * However, this function makes some optimizations to ensure all arguments
 * are only serialized once instead of \#calls times.
 *
 * This function is non-blocking and does not wait for the target machines
 * to complete execution of the function. Different remote_calls may be handled
 * by different threads on the target machines and thus the target function
 * should be made thread-safe. Alternatively, see set_sequentialization_key()
 * to force sequentialization of groups of remote_calls. A full_barrier()
 * may also be issued to wait for completion of all RPC calls issued prior
 * to the full barrier.
 *
 * Example:
 * \code
 * // A print function is defined
 * void print(std::string s) {
 *   std::cout << s << "\n";
 * }
 *
 * ... ...
 * // call the print function on machine 1, 3 and 5 to print "hello"
 * std::vector<procid_t> procs;
 * procs.push_back(1); procs.push_back(3); procs.push_back(5);
 * dc.remote_call(procs.begin(), procs.end(), print, "hello");
 * \endcode
 *
 *
 * \param machine_begin The beginning of an iterator range containing a list
 *                      machines to call.  Iterator::value_type must be
 *                      castable to procid_t.
 * \param machine_end   The end of an iterator range containing a list
 *                      machines to call.  Iterator::value_type must be
 *                      castable to procid_t.
 * \param fn The function to run on the target machine
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 */
  void remote_call(Iterator machine_begin, Iterator machine_end, Fn fn, ...);


/**
 * \brief Performs a blocking RPC call to the target machine
 * to run the provided function pointer.
 *
 * remote_request() calls the function "fn" on a target remote machine. Provided
 * arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.
 *
 * Unlike remote_call(), remote_request() is blocking and waits for the target
 * machine to complete execution of the function. However, different
 * remote_requests may be still be handled by different threads on the target
 * machine.
 *
 * Example:
 * \code
 * // A print function is defined
 * int add_one(int i) {
 *   return i + 1;
 * }
 *
 * ... ...
 * // call the add_one function on machine 1
 * int i = 10;
 * i = dc.remote_request(1, add_one, i);
 * // i will now be 11
 * \endcode
 *
 * \see graphlab::fiber_remote_request
 *      graphlab::distributed_control::future_remote_request
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns the same return type as the function fn
 */
  RetVal remote_request(procid_t targetmachine, Fn fn, ...);


/**
 * \brief Performs a non-blocking RPC call to the target machine
 * to run the provided function pointer.
 *
 * future_remote_request() calls the function "fn" on a target remote machine. Provided
 * arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.
 *
 * future_remote_request() is like remote_request(), but is non-blocking.
 * Instead, it returns immediately a \ref graphlab::request_future object
 * which will allow you wait for the return value.
 *
 * Example:
 * \code
 * // A print function is defined
 * int add_one(int i) {
 *   return i + 1;
 * }
 *
 * ... ...
 * // call the add_one function on machine 1
 * int i = 10;
 * graphlab::request_future<int> ret = dc.remote_request(1, add_one, i);
 * int result = ret();
 * // result will be 11
 * \endcode
 *
 * \see graphlab::fiber_remote_request
 *      graphlab::distributed_control::remote_request
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns the same return type as the function fn
 */
  request_future<RetVal> future_remote_request(procid_t targetmachine, Fn fn, ...);


#endif
/*************************************************************************
 *              Here end the Doxygen fake functions block                *
 *************************************************************************/


 private:
  /**
   *
   * l
  Immediately calls the function described by the data
  inside the buffer. This should not be called directly.
  */
  void exec_function_call(procid_t source, unsigned char packet_type_mask, const char* data, const size_t len);


  /**
   * \internal
   * Called by handler threads to process the function call block
   */
  void process_fcall_block(fcallqueue_entry &fcallblock);


  /**
   * \internal
   * Receive a collection of serialized function calls.
   * This function will take ownership of the pointer
   */
  void deferred_function_call_chunk(char* buf, size_t len, procid_t src);


  /**
   * \internal
   * Gets the sequentialization key of a block if any.
   */
  unsigned char get_block_sequentialization_key(fcallqueue_entry& fcallblock);

  /**
   * \internal
  This is called by the function handler threads
  */
  void fcallhandler_loop(size_t id);

 public:
   /// \cond GRAPHLAB_INTERNAL
  /**
   * \internal
   * Stops one group of handler threads and wait for them to complete.
   * May be used to allow external threads to take over RPC processing.
   *
   * \param threadid Group number to stop
   * \param total_threadid Number of groups
   */
  void stop_handler_threads(size_t threadid, size_t total_threadid);

  /**
   * \internal
   * Stops one group of handler threads and returns immediately without
   * waiting for them to complete.
   * May be used to allow external threads to take over RPC processing.
   *
   * \param threadid Group number to stop
   * \param total_threadid Number of groups
   */
  void stop_handler_threads_no_wait(size_t threadid, size_t total_threadid);

  /**
   * \internal
   * Performs RPC processing for a group of threads in lieu of the built-in
   * RPC threads. The group must be stopped before using stop_handler_threads
   *
   * \param threadid Group number to handle
   * \param total_threadid Number of groups
   */
  void handle_incoming_calls(size_t threadid, size_t total_threadid);


  /**
   * \internal
   * Restarts internal RPC threads for a group.
   * The group must be stopped before using stop_handler_threads
   *
   * \param threadid Group number to restart
   * \param total_threadid Number of groups
   */
  void start_handler_threads(size_t threadid, size_t total_threadid);

  /// \internal
  size_t recv_queue_length() const {
    return fcallqueue_length.value;
  }
  /// \internal
  size_t send_queue_length() const {
    return comm->send_queue_length();
  }

  /// \endcond

 private:
  inline void inc_calls_sent(procid_t procid) {
    //PERMANENT_ACCUMULATE_DIST_EVENT(eventlog, CALLS_EVENT, 1);
    global_calls_sent[procid].inc();
  }

  inline void inc_calls_received(procid_t procid) {

    if (!full_barrier_in_effect) {
      size_t t = global_calls_received[procid].inc();
      if (full_barrier_in_effect) {
        if (t == calls_to_receive[procid]) {
          // if it was me who set the bit
          if (procs_complete.set_bit(procid) == false) {
            // then decrement the incomplete count.
            // if it was me to decreased it to 0
            // lock and signal
            full_barrier_lock.lock();
            if (num_proc_recvs_incomplete.dec() == 0) {
              full_barrier_cond.signal();
            }
            full_barrier_lock.unlock();
          }
        }
      }
    }
    else {
      //check the proc I just incremented.
      // If I just exceeded the required size, I need
      // to decrement the full barrier counter
      if (global_calls_received[procid].inc() == calls_to_receive[procid]) {
        // if it was me who set the bit
        if (procs_complete.set_bit(procid) == false) {
          // then decrement the incomplete count.
          // if it was me to decreased it to 0
          // lock and signal
          full_barrier_lock.lock();
          if (num_proc_recvs_incomplete.dec() == 0) {
            full_barrier_cond.signal();
          }
          full_barrier_lock.unlock();
        }
      }
    }
  }

 public:
   /// \brief  Returns the total number of RPC calls made
  inline size_t calls_sent() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += global_calls_sent[i].value;
    }
    return ctr;
  }

   /// \brief  Returns the total number of RPC calls made in millions
  inline double mega_calls_sent() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += global_calls_sent[i].value;
    }
    return double(ctr)/(1024 * 1024);
  }


  /// \brief Returns the total number of RPC calls received
  inline size_t calls_received() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += global_calls_received[i].value;
    }
    return ctr;
  }

  /** \brief Returns the total number of bytes sent excluding headers and other
   *  control overhead. Also see network_bytes_sent()
   */
  inline size_t bytes_sent() const {
    size_t ret = 0;
    for (size_t i = 0;i < senders.size(); ++i) ret += senders[i]->bytes_sent();
    return ret;
  }

  /** \brief Returns the total number of bytes sent including all headers
   * and other control overhead. Also see bytes_sent()
   */
  inline size_t network_bytes_sent() const {
    return comm->network_bytes_sent();
  }

  /** \brief Returns the total number of megabytes sent including all headers
   * and other control overhead. Also see network_bytes_sent()
   */
  inline double network_megabytes_sent() const {
    return double(comm->network_bytes_sent()) / (1024 * 1024);
  }


  /** \brief Returns the total number of bytes received excluding all headers
   * and other control overhead. Also see bytes_sent().
   */
  inline size_t bytes_received() const {
    size_t ret = 0;
    for (size_t i = 0;i < global_bytes_received.size(); ++i) {
      ret += global_bytes_received[i].value;
    }
    return ret;
  }

  /// \cond GRAPHLAB_INTERNAL

  /// \internal
  inline size_t register_object(void* v, dc_impl::dc_dist_object_base *rmiinstance) {
    ASSERT_NE(v, (void*)NULL);
    registered_objects.push_back(v);
    registered_rmi_instance.push_back(rmiinstance);
    return registered_objects.size() - 1;
  }

  /// \internal
  inline void* get_registered_object(size_t id) {
    while(__builtin_expect((id >= num_registered_objects()), 0)) sched_yield();
    while (__builtin_expect(registered_objects[id] == NULL, 0)) sched_yield();
    return registered_objects[id];
  }

  /// \internal
  inline dc_impl::dc_dist_object_base* get_rmi_instance(size_t id) {
    while(id >= num_registered_objects()) sched_yield();
    ASSERT_NE(registered_rmi_instance[id], (void*)NULL);
    return registered_rmi_instance[id];
  }

  /// \internal
  inline void clear_registered_object(size_t id) {
    registered_objects[id] = (void*)NULL;
    registered_rmi_instance[id] = NULL;
  }

  inline void register_send_buffer(dc_impl::thread_local_buffer* buffer) {
    for (size_t i = 0;i < senders.size(); ++i) {
      senders[i]->register_send_buffer(buffer);
    }
  }

  inline void unregister_send_buffer(dc_impl::thread_local_buffer* buffer) {
    for (size_t i = 0;i < senders.size(); ++i) {
      senders[i]->unregister_send_buffer(buffer);
    }
  }

  /// \endcond

  /**
   * \brief Performs a local flush of all send buffers
   */
  void flush();

  /**
   * \brief Performs a local flush of all send buffers
   */
  void flush(procid_t p);

  /**
   * \brief Requests a flush of all send buffers to happen soon;
   */
  void flush_soon();

  /**
   * \brief Requests a flush of one send buffers to happen soon;
   */
  void flush_soon(procid_t p);

  /**
   * \brief Writes a string to the send buffer and flushes
   */
  inline void write_to_buffer(procid_t target, char* c, size_t len) {
    senders[target]->write_to_buffer(c, len);
  }


  /**
   * \brief Sends an object to a target machine and blocks until the
   * target machine calls recv_from() to receive the object.
   *
   * This function sends a \ref sec_serializable object "t" to the target
   * machine, but waits for the target machine to call recv_from()
   * before returning to receive the object before returning.
   *
   * Example:
   * \code
   * int i;
   * if (dc.procid() == 0) {
   *   i = 10;
   *   // if I am machine 0, I send the value i = 10 to machine 1
   *   dc.send_to(1, i);
   * } else if (dc.procid() == 1) {
   *   // machine 1 receives the value of i from machine 0
   *   dc.recv_from(0, i);
   * }
   * // at this point machines 0 and 1 have the value i = 10
   * \endcode
   *
   * \tparam U the type of object to send. This should be inferred by the
   *           compiler.
   * \param target The target machine to send to. Target machine must call
   *               recv_from() before this call will return.
   * \param t      The object to send. It must be serializable. The type must
   *               match the target machine's call to recv_from()
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                match the "control" parameter on the target machine's
   *                recv_from() call.
   *
   * \note Behavior is undefined if multiple threads on the same machine
   * call send_to simultaneously
   *
   */
  template <typename U>
  inline void send_to(procid_t target, U& t, bool control = false);

   /**
   * \brief Waits to receives an object a source machine sent via send_to()
   *
   * This function waits to receives a \ref sec_serializable object "t" from a
   * source machine. The source machine must send the object using
   * send_to(). The source machine will wait for the target machine's
   * recv_from() to complete before returning.
   *
   * Example:
   * \code
   * int i;
   * if (dc.procid() == 0) {
   *   i = 10;
   *   // if I am machine 0, I send the value i = 10 to machine 1
   *   dc.send_to(1, i);
   * } else if (dc.procid() == 1) {
   *   // machine 1 receives the value of i from machine 0
   *   dc.recv_from(0, i);
   * }
   * // at this point machines 0 and 1 have the value i = 10
   * \endcode
   *
   * \tparam U the type of object to receive. This should be inferred by the
   *           compiler.
   * \param source The target machine to receive from. This function will block
   *               until data is received.
   * \param t      The object to receive. It must be serializable and the type
   *               must match the source machine's call to send_to()
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                match the "control" parameter on the source machine's
   *                send_to() call.
   *
   * \note Behavior is undefined if multiple threads on the same machine
   * call recv_from simultaneously
   *
   */
  template <typename U>
  inline void recv_from(procid_t source, U& t, bool control = false);


  /**
   * \brief This function allows one machine to broadcasts an object to all
   * machines.
   *
   * The originator calls broadcast with data provided in
   * in 'data' and originator set to true.
   * All other callers call with originator set to false.
   *
   * The originator will then return 'data'. All other machines
   * will receive the originator's transmission in the "data" parameter.
   *
   * This call is guaranteed to have barrier-like behavior. That is to say,
   * this call will block until all machines enter the broadcast function.
   *
   * Example:
   * \code
   * int i;
   * if (procid() == 0) {
   *   // if I am machine 0, I broadcast the value i = 10 to all machines
   *   i = 10;
   *   dc.broadcast(i, true);
   * } else {
   *   // all other machines receive the broadcast value
   *   dc.broadcast(i, false);
   * }
   * // at this point, all machines have i = 10
   * \endcode
   *
   * \note Behavior is undefined if more than one machine calls broadcast
   * with originator set to true.
   *
   * \note Behavior is undefined if multiple threads on the same machine
   * call broadcast simultaneously
   *
   * \param data If this is the originator, this will contain the object to
   *             broadcast. Otherwise, this will be a reference to the object
   *             receiving the broadcast.
   * \param originator Set to true if this is the source of the broadcast.
   *                   Set to false otherwise.
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                be the same on all machines.
   */
  template <typename U>
  inline void broadcast(U& data, bool originator, bool control = false);

  /**
   * \brief Collects information contributed by each machine onto
   *         one machine.
   *
   * The goal is to collect some information from each machine onto a single
   * target machine (sendto). To accomplish this,
   * each machine constructs a vector of length numprocs(), and stores
   * the data to communicate in the procid()'th entry in the vector.
   * Then calling gather with the vector and the target machine will send
   * the contributed value to the target.
   * When the function returns, machine sendto will have the complete vector
   * where data[i] is the data contributed by machine i.
   *
   * Example:
   * \code
   * // construct the vector of values
   * std::vector<int> values;
   * values.resize(dc.numprocs());
   *
   * // set my contributed value
   * values[dc.procid()] = dc.procid();
   * dc.gather(values, 0);
   * // at this point machine 0 will have a vector with length equal to the
   * // number of processes, and containing values [0, 1, 2, ...]
   * // All other machines value vector will be unchanged.
   * \endcode
   *
   * \note Behavior is undefined machines call gather with different values for
   * sendto
   *
   * \note Behavior is undefined if multiple threads on the same machine
   * call gather simultaneously
   *
   * \param data  A vector of length equal to the number of processes. The
   *              information to communicate is in the entry data[procid()]
   * \param sendto Machine which will hold the complete vector at the end
   *               of the operation. All machines must have the same value
   *               for this parameter.
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                be the same on all machines.
   */
  template <typename U>
  inline void gather(std::vector<U>& data, procid_t sendto, bool control = false);

  /**
   * \brief Sends some information contributed by each machine to all machines
   *
   * The goal is to have each machine broadcast a piece of information to all
   * machines. This is like gather(), but all machines have the complete vector
   * at the end.  To accomplish this, each machine constructs a vector of
   * length numprocs(), and stores the data to communicate in the procid()'th
   * entry in the vector.  Then calling all_gather with the vector will result
   * in all machines having a complete copy of the vector containing all
   * contributions (entry 0 from machine 0, entry 1 from machine 1, etc).
   *
   * Example:
   * \code
   * // construct the vector of values
   * std::vector<int> values;
   * values.resize(dc.numprocs());
   *
   * // set my contributed value
   * values[dc.procid()] = dc.procid();
   * dc.all_gather(values);
   * // at this point all machine will have a vector with length equal to the
   * // number of processes, and containing values [0, 1, 2, ...]
   * \endcode
   *
   * \note Behavior is undefined if multiple threads on the same machine
   * call all_gather simultaneously
   *
   * \param data  A vector of length equal to the number of processes. The
   *              information to communicate is in the entry data[procid()]
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                be the same on all machines.
   */
  template <typename U>
  inline void all_gather(std::vector<U>& data, bool control = false);


  /**
   * \brief Combines a value contributed by each machine, making the result
   * available to all machines.
   *
   * Each machine calls all_reduce() with a object which is serializable
   * and has operator+= implemented. When all_reduce() returns, the "data"
   * variable will contain a value corresponding to adding up the objects
   * contributed by each machine.
   *
   * Example:
   * \code
   * int i = 1;
   * dc.all_reduce(i);
   * // since each machine contributed the value "1",
   * // all machines will have i = numprocs() here.
   * \endcode
   *
   * \param data  A piece of data to perform a reduction over.
   *              The type must implement operator+=.
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                be the same on all machines.
   */
  template <typename U>
  inline void all_reduce(U& data, bool control = false);

  /**
   * \brief Combines a value contributed by each machine, making the result
   * available to all machines.
   *
   * This function is equivalent to all_reduce(), but with an externally
   * defined PlusEqual function.
   *
   * Each machine calls all_reduce() with a object which is serializable
   * and a function "plusequal" which combines two instances of the object.
   * When all_reduce2() returns, the "data"
   * variable will contain a value corresponding to adding up the objects
   * contributed by each machine using the plusequal function.
   *
   * Where U is the type of the object, the plusequal function must be of
   * the form:
   * \code
   * void plusequal(U& left, const U& right);
   * \endcode
   * and must implement the equivalent of <code>left += right; </code>
   *
   * Example:
   * \code
   * void int_plus_equal(int& a, const int& b) {
   *  a+=b;
   * }
   *
   * int i = 1;
   * dc.all_reduce2(i, int_plus_equal);
   * // since each machine contributed the value "1",
   * // all machines will have i = numprocs() here.
   * \endcode
   *
   * \param data  A piece of data to perform a reduction over.
   * \param plusequal A plusequal function on the data. Must have the prototype
   *                  void plusequal(U&, const U&)
   * \param control Optional parameter. Defaults to false. If set to true,
   *                this will marked as control plane communication and will
   *                not register in bytes_received() or bytes_sent(). This must
   *                be the same on all machines.
   */
  template <typename U, typename PlusEqual>
  inline void all_reduce2(U& data, PlusEqual plusequal, bool control = false);


   /**
    \brief A distributed barrier which waits for all machines to call the
          barrier() function before proceeding.

    A machine calling the barrier() will wait until every machine
    reaches this barrier before continuing. Only one thread from each machine
    should call the barrier.

    \see full_barrier
    */
  void barrier();


 /*****************************************************************************
                      Implementation of Full Barrier
*****************************************************************************/
  /**
   * \brief A distributed barrier which waits for all machines to call
   * the full_barrier() function before proceeding. Also waits for all
   * previously issued remote calls to complete.

   Similar to the barrier(), but provides additional guarantees that
   all calls issued prior to this barrier are completed before
  i returning.

  \note This function could return prematurely if
  other threads are still issuing function calls since we
  cannot differentiate between calls issued before the barrier
  and calls issued while the barrier is being evaluated.
  Therefore, when used in a multithreaded scenario, the user must ensure
  that all other threads which may perform operations using this object
  are stopped before the full barrier is initated.

  \see barrier
  */
  void full_barrier();


  /**
   * \brief A wrapper on cout, that outputs only on machine 0
   */
  std::ostream& cout() const {
    if (procid() == 0) return std::cout;
    else return nullstrm;
  }

  /**
   * \brief A wrapper on cerr, that outputs only on machine 0
   */
  std::ostream& cerr() const {
    if (procid() == 0) return std::cerr;
    else return nullstrm;
  }

 private:
  mutex full_barrier_lock;
  fiber_conditional full_barrier_cond;
  std::vector<size_t> calls_to_receive;
  // used to inform the counter that the full barrier
  // is in effect and all modifications to the calls_recv
  // counter will need to lock and signal
  volatile bool full_barrier_in_effect;

  /** number of 'source' processor counts which have
  not achieved the right recv count */
  atomic<size_t> num_proc_recvs_incomplete;

  /// Marked as 1 if the proc is complete
  dense_bitset procs_complete;
   ///\internal
  mutable boost::iostreams::stream<boost::iostreams::null_sink> nullstrm;

 /*****************************************************************************
                      Collection of Statistics
*****************************************************************************/

 private:
  struct collected_statistics {
    size_t callssent;
    size_t bytessent;
    size_t network_bytessent;
    collected_statistics(): callssent(0), bytessent(0), network_bytessent(0) { }
    void save(oarchive &oarc) const {
      oarc << callssent << bytessent << network_bytessent;
    }
    void load(iarchive &iarc) {
      iarc >> callssent >> bytessent >> network_bytessent;
    }
  };
 public:
  /** Gather RPC statistics. All machines must call
   this function at the same time. However, only proc 0 will
   return values */
  std::map<std::string, size_t> gather_statistics();
};


} // namespace graphlab

#define REGISTER_RPC(dc, f) dc.register_rpc<typeof(f)*, f>(std::string(BOOST_PP_STRINGIZE(f)))

#include <graphlab/rpc/function_arg_types_undef.hpp>
#include <graphlab/rpc/function_call_dispatch.hpp>
#include <graphlab/rpc/request_dispatch.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/dc_services.hpp>

namespace graphlab {

template <typename U>
inline void distributed_control::send_to(procid_t target, U& t, bool control) {
  distributed_services->send_to(target, t, control);
}

template <typename U>
inline void distributed_control::recv_from(procid_t source, U& t, bool control) {
  distributed_services->recv_from(source, t, control);
}

template <typename U>
inline void distributed_control::broadcast(U& data, bool originator, bool control) {
  distributed_services->broadcast(data, originator, control);
}

template <typename U>
inline void distributed_control::gather(std::vector<U>& data, procid_t sendto, bool control) {
  distributed_services->gather(data, sendto, control);
}

template <typename U>
inline void distributed_control::all_gather(std::vector<U>& data, bool control) {
  distributed_services->all_gather(data, control);
}

template <typename U>
inline void distributed_control::all_reduce(U& data, bool control) {
  distributed_services->all_reduce(data, control);
}


template <typename U, typename PlusEqual>
inline void distributed_control::all_reduce2(U& data, PlusEqual plusequal, bool control) {
  distributed_services->all_reduce2(data, plusequal, control);
}


}

#include <graphlab/util/mpi_tools.hpp>
#endif


================================================
FILE: src/graphlab/rpc/dc_buffered_stream_send2.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <boost/iostreams/stream.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_buffered_stream_send2.hpp>
#include <graphlab/util/branch_hints.hpp>
namespace graphlab {
namespace dc_impl {

  void dc_buffered_stream_send2::flush() {
    comm->trigger_send_timeout(target, true);
  }

  void dc_buffered_stream_send2::flush_soon() {
    comm->trigger_send_timeout(target, false);
  }


  inline size_t dc_buffered_stream_send2::bytes_sent() {
    size_t ret = total_bytes_sent;
    lock.lock();
    for (size_t i = 0;i < send_buffers.size(); ++i) {
      ret += send_buffers[i]->get_bytes_sent(target); 
    }
    lock.unlock();
    return ret;
  }

  void dc_buffered_stream_send2::write_to_buffer(char* c, size_t len)  {
    lock.lock();
    additional_flush_buffers.push_back(std::make_pair(c, len));
    lock.unlock();
  }

  void dc_buffered_stream_send2::register_send_buffer(thread_local_buffer* buffer) {
    lock.lock();
    send_buffers.push_back(buffer);
    to_send.resize(send_buffers.size());
    lock.unlock();
  }

  void dc_buffered_stream_send2::unregister_send_buffer(thread_local_buffer* buffer) {
    lock.lock();
    for (size_t i = 0;i < send_buffers.size(); ++i) {
      if (send_buffers[i] == buffer) {
        total_bytes_sent.inc(send_buffers[i]->get_bytes_sent(target));
        send_buffers.erase(send_buffers.begin() + i);
        break;
      }
    }
    to_send.resize(send_buffers.size());
    lock.unlock();
  }

  dc_buffered_stream_send2::~dc_buffered_stream_send2() {
    // unregister all the buffers.
    std::vector<thread_local_buffer*> all_buffers;
    for (size_t i = 0; i < all_buffers.size(); ++i) {
      unregister_send_buffer(all_buffers[i]);
    }
  }

  size_t dc_buffered_stream_send2::get_outgoing_data(circular_iovec_buffer& outdata) {
    lock.lock();
    size_t sendlen = 0;
    for (size_t i = 0;i < send_buffers.size(); ++i) {
      std::pair<buffer_elem*, buffer_elem*> bufs = send_buffers[i]->extract(target);
      if (bufs.first != NULL) {
        while(bufs.first != bufs.second) {
          buffer_elem* prev = bufs.first;
          iovec sendvec;
          sendvec.iov_base = bufs.first->buf;
          sendvec.iov_len = bufs.first->len;
          sendlen += sendvec.iov_len;
          outdata.write(sendvec);
          buffer_elem** next = &bufs.first->next;
          volatile buffer_elem** n = (volatile buffer_elem**)(next);
          while(__unlikely__((*n) == NULL)) {
            asm volatile("pause\n": : :"memory");
          }
          bufs.first = (buffer_elem*)(*n);
          delete prev;
        }
      }
    }
    for (size_t i = 0;i < additional_flush_buffers.size(); ++i) {
      iovec sendvec;
      sendvec.iov_base = additional_flush_buffers[i].first;
      sendvec.iov_len = additional_flush_buffers[i].second;
      sendlen += sendvec.iov_len;
      outdata.write(sendvec);
    }
    lock.unlock();
    return sendlen;
  }
} // namespace dc_impl
} // namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_buffered_stream_send2.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_BUFFERED_STREAM_SEND2_HPP
#define DC_BUFFERED_STREAM_SEND2_HPP
#include <iostream>
#include <boost/function.hpp>
#include <boost/bind.hpp>
#include <boost/type_traits/is_base_of.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/thread_local_send_buffer.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_comm_base.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/inplace_lf_queue.hpp>
#include <graphlab/logger/logger.hpp>
namespace graphlab {
class distributed_control;

namespace dc_impl {


/**
 * \internal
   \ingroup rpc
Sender for the dc class.
  The job of the sender is to take as input data blocks of
  pieces which should be sent to a single destination socket.
  This can be thought of as a sending end of a multiplexor.
  This class performs buffered transmissions using an blocking
  queue with one call per queue entry.
  A seperate thread is used to transmit queue entries. Rudimentary
  write combining is used to decrease transmission overhead.
  This is typically the best performing sender.

  This can be enabled by passing "buffered_queued_send=yes"
  in the distributed control initstring.

  dc_buffered_stream_send22 is similar, but does not perform write combining.

*/

class dc_buffered_stream_send2: public dc_send{
 public:
  dc_buffered_stream_send2(distributed_control* dc,
                                   dc_comm_base *comm,
                                   procid_t target) :
                  dc(dc),  comm(comm), target(target) { }

  ~dc_buffered_stream_send2();

  void register_send_buffer(thread_local_buffer* buffer);

  void unregister_send_buffer(thread_local_buffer* buffer);

  size_t get_outgoing_data(circular_iovec_buffer& outdata);

  inline size_t bytes_sent();

  void write_to_buffer(char* c, size_t len);

  void flush();

  void flush_soon();

 private:
  /// pointer to the owner
  distributed_control* dc;
  dc_comm_base *comm;
  procid_t target;
  atomic<size_t> total_bytes_sent;


  std::vector<thread_local_buffer*> send_buffers;
  // temporary array matched to the same length as send_buffers
  // to avoid repeated reallocation of this array when 
  // get_outgoing_data is called
  std::vector<std::vector<std::pair<char*, size_t> > > to_send;

  std::vector<std::pair<char*, size_t> > additional_flush_buffers;
  mutex lock;
};


} // namespace dc_impl
} // namespace graphlab
#endif // DC_BUFFERED_STREAM_SEND_EXPQUEUE_HPP


================================================
FILE: src/graphlab/rpc/dc_comm_base.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_COMM_BASE_HPP
#define DC_COMM_BASE_HPP
#include <sys/socket.h>
#include <vector>
#include <string>
#include <map>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_receive.hpp>
#include <graphlab/rpc/dc_send.hpp>
namespace graphlab {
namespace dc_impl {  

  
/**
 * \ingroup rpc
 * \internal
The base class of all comms implementations
*/
class dc_comm_base {
 public:
   
  inline dc_comm_base() { };
  
  virtual size_t capabilities() const  = 0;
  /**
   Parses initialization parameters. Most of these parameters are
   user provided, or provided on a higher level initialization system.
   It is entirely up to the comm implementation how these parameters to be treated.
   The descriptions here are largely prescriptive.
   All machines are called with the same initialization parameters (of course with the 
   exception of curmachineid)

   The expected behavior is that 
   this fuction should pause until all communication has been set up
   and returns the number of systems in the network.
   After which, all other remaining public functions (numprocs(), send(), etc)
   should operate normally. Every received message should immediate trigger the 
   attached receiver
   
   machines: a vector of string over machine IDs. This is typically provided by the user
             or through some other initialization mechanism
   initstring: Additional parameters passed by the user
   curmachineid: The ID of the current machine. Will be size_t(-1) if this is not available.
                 (Some comm protocols will negotiate this itself.)
   
   receiver: the receiving object
  */
  virtual void init(const std::vector<std::string> &machines,
            const std::map<std::string,std::string> &initopts,
            procid_t curmachineid,
            std::vector<dc_receive*> receiver,
            std::vector<dc_send*> sender) = 0;

  /// Must close all connections when this function is called
  virtual void close() = 0;
  
  virtual void trigger_send_timeout(procid_t target, bool urgent) = 0;
  
  virtual ~dc_comm_base() {}
  virtual procid_t numprocs() const = 0;
  
  virtual procid_t procid() const = 0;
  
  virtual size_t network_bytes_sent() const = 0;
  virtual size_t network_bytes_received() const = 0;
  virtual size_t send_queue_length() const = 0;

};

} // namespace dc_impl
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/rpc/dc_compile_parameters.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_COMPILE_PARAMETERS_HPP
#define GRAPHLAB_DC_COMPILE_PARAMETERS_HPP

// do not change
/**
  \ingroup rpc
  \def RPC_DEFAULT_COMMTYPE
  \brief default communication method
 */
#define RPC_DEFAULT_COMMTYPE TCP_COMM

/**
  \ingroup rpc
  \def RPC_MAX_N_PROCS
  \brief Maximum number of processes supported
 */
#define RPC_MAX_N_PROCS 128

/**
 * \ingroup RPC
 * \def RECEIVE_BUFFER_SIZE
 * The size of the receive buffer for each socket
 */
#define RECEIVE_BUFFER_SIZE 131072

/**************************************************************************/
/*                                                                        */
/*                      Send Buffer Behavior Control                      */
/*                                                                        */
/**************************************************************************/

/*
 * The architecture of the sending subsystem is that there is 1 main send thread.
 * Which polls a collection of thread local queues.
 *
 * Each thread local send queues comprises of 1 queue for each target machine.
 * Each queue comprises of 2 parts:
 *  - An array of "full" buffers
 *  - One not-full buffer.
 */ 

/**
 * \ingroup RPC
 * \def SEND_POLL_TIMEOUT
 * The TCP sender polls the queues every so often to ensure
 * progress; This is the timeout value for the number of microseconds
 * between each poll.
 */
#define SEND_POLL_TIMEOUT 10000


/**
 * \ingroup rpc
 * \def INITIAL_BUFFER_SIZE
 * Each buffer is allocated to this size at the start
 */
#define INITIAL_BUFFER_SIZE 65536


/**
 * \ingroup rpc
 * \def FULL_BUFFER_SIZE_LIMIT
 * Once the buffer contents exceeds this, it becomes a full buffer.
 */
#define FULL_BUFFER_SIZE_LIMIT 63000

/**
 * \ingroup RPC
 * \def NUM_FULL_BUFFER_LIMIT 
 * Number of full buffers in the send queue before a flush is explicitly called.
 */
#define NUM_FULL_BUFFER_LIMIT 32 

/**************************************************************************/
/*                                                                        */
/*                          RPC Handling Control                          */
/*                                                                        */
/**************************************************************************/

/**
  \ingroup rpc
  \def RPC_DEFAULT_NUMHANDLERTHREADS
  \brief default number of handler threads to spawn.
 */
#define RPC_DEFAULT_NUMHANDLERTHREADS (size_t)(-1)

/**
 * \ingroup RPC
 * \def RPC_DO_NOT_BREAK_BLOCKS
 *
 * If this option is turned on,
 * collections of messages recieved in a buffer 
 * will all be executed by the same thread. 
 * This decreases latency and increases throughput
 * but at a cost of parallelism.
 * Also, if turned on together with RPC_BLOCK_STRIPING,
 * the sequentialization key is ignored.
 */
#define RPC_DO_NOT_BREAK_BLOCKS


/**
 * \ingroup RPC
 * \def RPC_BLOCK_STRIPING
 * Incoming buffers are striped across threads 
 * to be processed. If this is turned on together with
 * RPC_DO_NOT_BREAK_BLOCKS, the sequentialization key is
 * ignored.
 */
#define RPC_BLOCK_STRIPING

/**************************************************************************/
/*                                                                        */
/*                             Miscellaneous                              */
/*                                                                        */
/**************************************************************************/
/**
 * \ingroup RPC
 * \def DEFAULT_BUFFERED_EXCHANGE_SIZE
 * maximum size of each buffer in the buffer exchange. Beyond this size,
 * a send is performed.
 */
#define DEFAULT_BUFFERED_EXCHANGE_SIZE FULL_BUFFER_SIZE_LIMIT


#endif


================================================
FILE: src/graphlab/rpc/dc_dist_object.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/dc.hpp>

#ifndef GRAPHLAB_DC_DIST_OBJECT_HPP
#define GRAPHLAB_DC_DIST_OBJECT_HPP
#include <vector>
#include <string>
#include <set>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/fiber_conditional.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_dist_object_base.hpp>
#include <graphlab/rpc/object_request_issue.hpp>
#include <graphlab/rpc/object_call_issue.hpp>
#include <graphlab/rpc/object_broadcast_issue.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>
#include <graphlab/util/charstream.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/macros_def.hpp>

#define BARRIER_BRANCH_FACTOR 128


namespace graphlab {


/**
\ingroup rpc
\brief Provides a class with its own distributed communication context, allowing
instances of the class to communicate with other remote instances.

The philosophy behind the dc_dist_object is the concept of "distributed
objects". The idea is that the user should be able to write code:

\code
void main() {
  // ... initialization of a distributed_control object dc ...

  distributed_vector vec(dc), vec2(dc);
  distributed_graph g(dc);
}
\endcode
where if run in a distributed setting, the "vec" variable, can behave as if it
is a single distributed object, and automatically coordinate its operations
across the network; communicating with the other instances of "vec" on the
other machines.  Essentially, each object (vec, vec2 and g) constructs its own
private communication context, which allows every machine's "vec" variable to
communicate only with other machine's "vec" variable. And similarly for "vec2"
and "g". This private communication context is provided by this dc_dist_object
class.

To construct a distributed object requires little work:
\code
class distributed_int_vector {
  private:
    // creates a local dc_dist_object context
    graphlab::dc_dist_object<distributed_int_vector> rmi;

  public:
    // context must be initialized on construction with the
    // root distributed_control object
    distributed_int_vector(distributed_control& dc): rmi(dc, this) {
      ... other initialization ...
      // make sure all machines finish constructing this object
      // before continuing
      rmi.barrier();
    }
};
\endcode

After which remote_call(), and remote_request() can be used to communicate
across the network with the same matching instance of the
distributed_int_vector.

Each dc_dist_object maintains its own private communication context which
is not influences by other communication contexts. In other words, the
<code>rmi.barrier()</code>, and all other operations in each instance of the
distributed_int_vector are independent of each other. In particular, the
<code>rmi.full_barrier()</code> only waits for completion of all RPC calls
from within the current communication context.

See the examples in \ref RPC for more usage examples.

\note While there is no real limit to the number of distributed
objects that can be created. However, each dc_dist_object does contain
a reasonably large amount of state, so frequent construction and deletion
of objects is not recommended.
*/
template <typename T>
class dc_dist_object : public dc_impl::dc_dist_object_base{
 private:
  distributed_control &dc_;
  size_t obj_id;
  size_t control_obj_id;  // object id of this object
  T* owner;
  std::vector<atomic<size_t> > callsreceived;
  std::vector<atomic<size_t> > callssent;
  std::vector<atomic<size_t> > bytessent;
  // make operator= private
  dc_dist_object<T>& operator=(const dc_dist_object<T> &d) {return *this;}
  friend class distributed_control;


  DECLARE_TRACER(distobj_remote_call_time);


 public:

  /// \cond GRAPHLAB_INTERNAL

  /// Should not be used by the user
  void inc_calls_received(procid_t p) {
    if (!full_barrier_in_effect) {
        size_t t = callsreceived[p].inc();
        if (full_barrier_in_effect) {
          if (t == calls_to_receive[p]) {
            // if it was me who set the bit
            if (procs_complete.set_bit(p) == false) {
              // then decrement the incomplete count.
              // if it was me to decreased it to 0
              // lock and signal
              full_barrier_lock.lock();
              if (num_proc_recvs_incomplete.dec() == 0) {
                full_barrier_cond.signal();
              }
              full_barrier_lock.unlock();
            }
          }
        }
    }
    else {
      //check the proc I just incremented.
      // If I just exceeded the required size, I need
      // to decrement the full barrier counter
      if (callsreceived[p].inc() == calls_to_receive[p]) {
        // if it was me who set the bit
        if (procs_complete.set_bit(p) == false) {
          // then decrement the incomplete count.
          // if it was me to decreased it to 0
          // lock and signal
          full_barrier_lock.lock();
          if (num_proc_recvs_incomplete.dec() == 0) {
            full_barrier_cond.signal();
          }
          full_barrier_lock.unlock();
        }
      }
    }
  }

  /// Should not be used by the user
  void inc_calls_sent(procid_t p) {
    callssent[p].inc();
  }

  /// Should not be used by the user
  void inc_bytes_sent(procid_t p, size_t bytes) {
    bytessent[p].inc(bytes);
  }

  /// Should not be used by the user
  size_t get_obj_id() const {
    return obj_id;
  }
  /// \endcond GRAPHLAB_INTERNAL
 public:

  /**
   * \brief Constructs a distributed object context.
   *
   * The constructor constructs a distributed object context which is
   * associated with the "owner" object.
   *
   * \param dc_ The root distributed_control which provides the
   *            communication control plane.
   * \param owner The object to associate with
   */
  dc_dist_object(distributed_control &dc_, T* owner):
    dc_(dc_),owner(owner) {
    callssent.resize(dc_.numprocs());
    callsreceived.resize(dc_.numprocs());
    bytessent.resize(dc_.numprocs());
    //------ Initialize the matched send/recv ------
    recv_froms.resize(dc_.numprocs());
    //------ Initialize the gatherer ------
    gather_receive.resize(dc_.numprocs());


    //------- Initialize the Barrier ----------
    child_barrier_counter.value = 0;
    barrier_sense = 1;
    barrier_release = -1;


    // compute my children
    childbase = size_t(dc_.procid()) * BARRIER_BRANCH_FACTOR + 1;
    if (childbase >= dc_.numprocs()) {
      numchild = 0;
    }
    else {
      size_t maxchild = std::min<size_t>(dc_.numprocs(),
                                         childbase + BARRIER_BRANCH_FACTOR);
      numchild = (procid_t)(maxchild - childbase);
    }

    parent =  (procid_t)((dc_.procid() - 1) / BARRIER_BRANCH_FACTOR)   ;

    //-------- Initialize all gather --------------
    ab_child_barrier_counter.value = 0;
    ab_barrier_sense = 1;
    ab_barrier_release = -1;


    //-------- Initialize the full barrier ---------

    full_barrier_in_effect = false;
    procs_complete.resize(dc_.numprocs());

    // register
    obj_id = dc_.register_object(owner, this);
    control_obj_id = dc_.register_object(this, this);

    //-------- Initialize Tracer
    std::string name = typeid(T).name();
    INITIALIZE_TRACER(distobj_remote_call_time,
                      std::string("dc_dist_object ") + name + ": remote_call time");
  }

  /// \brief The number of function calls received by this object
  size_t calls_received() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += callsreceived[i].value;
    }
    return ctr;
  }

  /// \brief The number of function calls sent from this object
  size_t calls_sent() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += callssent[i].value;
    }
    return ctr;
  }

  /** \brief The number of bytes sent from this object, excluding
   * headers and other control overhead.
   */
  size_t bytes_sent() const {
    size_t ctr = 0;
    for (size_t i = 0;i < numprocs(); ++i) {
      ctr += bytessent[i].value;
    }
    return ctr;
  }

  /// \brief A reference to the underlying distributed_control object
  distributed_control& dc() {
    return dc_;
  }

  /// \brief A const reference to the underlying distributed_control object
  const distributed_control& dc() const {
    return dc_;
  }

  /// \brief The current process ID
  inline procid_t procid() const {
    return dc_.procid();
  }

  /// \brief The number of processes in the distributed program.
  inline procid_t numprocs() const {
    return dc_.numprocs();
  }

  /**
   * \brief A wrapper on cout, that outputs only on machine 0
   */
  std::ostream& cout() const {
    return dc_.cout();
  }

  /**
   * \brief A wrapper on cerr, that outputs only on machine 0
   */
  std::ostream& cerr() const {
    return dc_.cout();
  }

  /// \cond GRAPHLAB_INTERNAL

    /*
  This generates the interface functions for the standard calls, basic calls
  The function looks like this:
  \code
  template<typename F , typename T0> void remote_call (procid_t target, F remote_function , T0 i0 )
  {
      ASSERT_LT(target, dc_.senders.size());
      if ((STANDARD_CALL & CONTROL_PACKET) == 0) inc_calls_sent(target);
      dc_impl::object_call_issue1 <T, F , T0> ::exec(dc_.senders[target],
                                                      STANDARD_CALL,
                                                      target,obj_id,
                                                      remote_function ,
                                                      i0 );
  }

  The argument to the RPC_INTERFACE_GENERATOR are:
    - the name of the rpc call ("remote_call" in the first one)
    - the name of the issueing processor ("object_call_issue")
    - The flags to set on the call ("STANDARD_CALL")

    The call can be issued with
    rmi.remote_call(target,
                    &object_type::function_name,
                    arg1,
                    arg2...)
  \endcode
  */
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(T, N) BOOST_PP_CAT(i, N)
  #define GENI(Z,N,_) BOOST_PP_CAT(i, N)
  #define GENT(Z,N,_) BOOST_PP_CAT(T, N)
  #define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);

  #define RPC_INTERFACE_GENERATOR(Z,N,FNAME_AND_CALL) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  void  BOOST_PP_TUPLE_ELEM(3,0,FNAME_AND_CALL) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    BEGIN_TRACEPOINT(distobj_remote_call_time); \
    if ((BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL) & CONTROL_PACKET) == 0) inc_calls_sent(target); \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,FNAME_AND_CALL),N) \
        <T, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(this, dc_.senders[target],  BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL), target,obj_id, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    END_TRACEPOINT(distobj_remote_call_time); \
  }   \

  /*
  Generates the interface functions. 3rd argument is a tuple (interface name, issue name, flags)
  */
  BOOST_PP_REPEAT(7, RPC_INTERFACE_GENERATOR, (remote_call, dc_impl::object_call_issue, STANDARD_CALL) )
  BOOST_PP_REPEAT(7, RPC_INTERFACE_GENERATOR, (control_call,dc_impl::object_call_issue, (STANDARD_CALL | CONTROL_PACKET)) )

  /**
   * This generates a "split call". Where the header of the call message
   * is written to with split_call_begin, and the message actually sent with
   * split_call_end(). It is then up to the user to serialize the message arguments
   * into the oarchive returned. The split call can provide performance gains 
   * when the contents of the message are large, since this allows the user to
   * control the serialization process. 
   *
   * Example:
   * \code
   * struct mystruct {
   *   void function_to_call(size_t len, wild_pointer w) { 
   *      // w will contain all the serialized contents of ..stuff...
   *   }
   *
   *   void stuff() {
   *     oarchive* oarc = rmi.split_call_begin(&mystruct::function_to_call);
   *     (*oarc) << ... stuff...
   *     rmi.split_call_end(1,  // to machine 1
   *                        oarc);
   *     
   *   }
   * }
   * \endcode
   */
  oarchive* split_call_begin(void (T::*remote_function)(size_t, wild_pointer)) {
    return dc_impl::object_split_call<T, void(T::*)(size_t, wild_pointer)>::split_call_begin(this, obj_id, remote_function);
  }

  /**
   * Sends a split call started by \ref split_call_begin
   * See \ref split_call_begin for details.
   */
  void split_call_end(procid_t target, oarchive* oarc) {
    inc_calls_sent(target);
    return dc_impl::object_split_call<T, void(T::*)(size_t, wild_pointer)>::split_call_end(this, oarc, dc_.senders[target],
                                                                           target, STANDARD_CALL);
  }

  /**
   * Cancels a split call began with split_call_begin
   */
  void split_call_cancel(oarchive* oarc) {
    return dc_impl::object_split_call<T, void(T::*)(size_t, wild_pointer)>::split_call_cancel(oarc);
  }


  #define BROADCAST_INTERFACE_GENERATOR(Z,N,FNAME_AND_CALL) \
  template<typename Iterator, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  void  BOOST_PP_TUPLE_ELEM(3,0,FNAME_AND_CALL) (Iterator target_begin, Iterator target_end, \
                      F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    if (target_begin == target_end) return;               \
    BEGIN_TRACEPOINT(distobj_remote_call_time); \
    if ((BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL) & CONTROL_PACKET) == 0) {            \
      Iterator iter = target_begin;       \
      while (iter != target_end){         \
        inc_calls_sent(*iter);            \
        ++iter;                           \
      }                                   \
    }                                     \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,FNAME_AND_CALL),N) \
        <Iterator, T, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(this, dc_.senders,  BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL), target_begin, target_end,obj_id, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    END_TRACEPOINT(distobj_remote_call_time); \
  }

  BOOST_PP_REPEAT(7, BROADCAST_INTERFACE_GENERATOR, (remote_call, dc_impl::object_broadcast_issue, STANDARD_CALL) )

  /*
  The generation procedure for requests are the same. The only
  difference is that the function name has to be changed a little to
  be identify the return type of the function, (typename
  dc_impl::function_ret_type<__GLRPC_FRESULT>) and the issuing
  processor is object_request_issue.

    The call can be issued with
    \code
    ret = rmi.remote_request(target,
                              &object_type::function_name,
                              arg1,
                              arg2...)
    \endcode
  */
#define CUSTOM_REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, size_t handle, unsigned char flags, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    if ((flags & CONTROL_PACKET) == 0) inc_calls_sent(target); \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(2,1,ARGS),N) \
        <T, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(this, dc_.senders[target],  handle, flags, target,obj_id, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  }


#define FUTURE_REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    request_future<__GLRPC_FRESULT> reply;      \
    custom_remote_request(target, reply.get_handle(), BOOST_PP_TUPLE_ELEM(2,1,ARGS), remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    return reply; \
  }   

  #define REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(2,0,ARGS) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    request_future<__GLRPC_FRESULT> reply;      \
    custom_remote_request(target, reply.get_handle(),BOOST_PP_TUPLE_ELEM(2,1,ARGS), remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    return reply(); \
  }


  /*
  Generates the interface functions. 3rd argument is a tuple
  (interface name, issue name, flags)
  */
 BOOST_PP_REPEAT(6, CUSTOM_REQUEST_INTERFACE_GENERATOR, (void custom_remote_request, dc_impl::object_request_issue) )
 BOOST_PP_REPEAT(6, REQUEST_INTERFACE_GENERATOR, (typename dc_impl::function_ret_type<__GLRPC_FRESULT>::type remote_request, (STANDARD_CALL | FLUSH_PACKET)) )
 BOOST_PP_REPEAT(6, FUTURE_REQUEST_INTERFACE_GENERATOR, (request_future<__GLRPC_FRESULT> future_remote_request, (STANDARD_CALL)) )


  #undef RPC_INTERFACE_GENERATOR
  #undef BROADCAST_INTERFACE_GENERATOR
  #undef REQUEST_INTERFACE_GENERATOR
  #undef CUSTOM_REQUEST_INTERFACE_GENERATOR
  #undef FUTURE_REQUEST_INTERFACE_GENERATOR
  /* Now generate the interface functions which allow me to call this
  dc_dist_object directly The internal calls are similar to the ones
  above. The only difference is that is that instead of 'obj_id', the
  parameter passed to the issue processor is "control_obj_id" which
  identifies the current RMI class.
  */
  #define RPC_INTERFACE_GENERATOR(Z,N,FNAME_AND_CALL) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
  void  BOOST_PP_TUPLE_ELEM(3,0,FNAME_AND_CALL) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    if ((BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL) & CONTROL_PACKET) == 0) inc_calls_sent(target); \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,FNAME_AND_CALL),N) \
        <dc_dist_object<T>, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(this, dc_.senders[target],  BOOST_PP_TUPLE_ELEM(3,2,FNAME_AND_CALL), target,control_obj_id, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
  }   \

  BOOST_PP_REPEAT(6, RPC_INTERFACE_GENERATOR, (internal_call,dc_impl::object_call_issue, STANDARD_CALL) )
  BOOST_PP_REPEAT(6, RPC_INTERFACE_GENERATOR, (internal_control_call,dc_impl::object_call_issue, (STANDARD_CALL | CONTROL_PACKET)) )


  #define REQUEST_INTERFACE_GENERATOR(Z,N,ARGS) \
  template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
    BOOST_PP_TUPLE_ELEM(3,0,ARGS) (procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    ASSERT_LT(target, dc_.senders.size()); \
    request_future<__GLRPC_FRESULT> reply;      \
    if ((BOOST_PP_TUPLE_ELEM(3,2,ARGS) & CONTROL_PACKET) == 0) inc_calls_sent(target); \
    BOOST_PP_CAT( BOOST_PP_TUPLE_ELEM(3,1,ARGS),N) \
        <dc_dist_object<T>, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)> \
          ::exec(this, dc_.senders[target],  reply.get_handle(), BOOST_PP_TUPLE_ELEM(3,2,ARGS), target,control_obj_id, remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENI ,_) ); \
    return reply(); \
  }   \

  /*
  Generates the interface functions. 3rd argument is a tuple (interface name, issue name, flags)
  */
  BOOST_PP_REPEAT(6, REQUEST_INTERFACE_GENERATOR, (typename dc_impl::function_ret_type<__GLRPC_FRESULT>::type internal_request, dc_impl::object_request_issue, (STANDARD_CALL)) )
  BOOST_PP_REPEAT(6, REQUEST_INTERFACE_GENERATOR, (typename dc_impl::function_ret_type<__GLRPC_FRESULT>::type internal_control_request, dc_impl::object_request_issue, (STANDARD_CALL | CONTROL_PACKET)) )


  #undef RPC_INTERFACE_GENERATOR
  #undef REQUEST_INTERFACE_GENERATOR
  #undef GENARC
  #undef GENT
  #undef GENI
  #undef GENARGS

 /// \endcond

#if DOXYGEN_DOCUMENTATION

/**
 * \brief Performs a non-blocking RPC call to the target machine
 * to run the provided function pointer.
 *
 * remote_call() calls the function "fn" on a target remote machine.
 * "fn" may be public, private or protected within the owner class; there are
 * no access restrictions. Provided arguments are serialized and sent to the
 * target.  Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, the return value is lost.
 *
 * remote_call() is non-blocking and does not wait for the target machine
 * to complete execution of the function. Different remote_calls may be handled
 * by different threads on the target machine and thus the target function
 * should be made thread-safe.
 * Alternatively, see distributed_control::set_sequentialization_key()
 * to force sequentialization of groups of remote calls.
 *
 * If blocking operation is desired, remote_request() may be used.
 * Alternatively, a full_barrier() may also be used to wait for completion of
 * all incomplete RPC calls.
 *
 * Example:
 * \code
 * // A print function is defined in the distributed object
 * class distributed_obj_example {
 *  graphlab::dc_dist_object<distributed_obj_example> rmi;
 *   ... initialization and constructor ...
 *  private:
 *    void print(std::string s) {
 *       std::cout << s << "\n";
 *    }
 *  public:
 *    void print_on_machine_one(std::string s) {
 *      // calls the print function on machine 1 with the argument "s"
 *      rmi.remote_call(1, &distributed_obj_example::print, s);
 *    }
 * }
 * \endcode
 *
 * Note the syntax for obtaining a pointer to a member function.
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 */
  void remote_call(procid_t targetmachine, Fn fn, ...);


/**
 * \brief Performs a non-blocking RPC call to a collection of machines
 * to run the provided function pointer.
 *
 * This function calls the provided function pointer on a collection of
 * machines contained in the iterator range [begin, end).
 * Provided arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, the return value is lost.
 *
 * This function is functionally equivalent to:
 *
 * \code
 * while(machine_begin != machine_end) {
 *  remote_call(*machine_begin, fn, ...);
 *  ++machine_begin;
 * }
 * \endcode
 *
 * However, this function makes some optimizations to ensure all arguments
 * are only serialized once instead of \#calls times.
 *
 * This function is non-blocking and does not wait for the target machines
 * to complete execution of the function. Different remote_calls may be handled
 * by different threads on the target machines and thus the target function
 * should be made thread-safe. Alternatively, see
 * distributed_control::set_sequentialization_key() to force sequentialization
 * of groups of remote_calls. A full_barrier()
 * may also be issued to wait for completion of all RPC calls issued prior
 * to the full barrier.
 *
 * Example:
 * \code
 * // A print function is defined in the distributed object
 * class distributed_obj_example {
 *  graphlab::dc_dist_object<distributed_obj_example> rmi;
 *   ... initialization and constructor ...
 *  private:
 *    void print(std::string s) {
 *       std::cout << s << "\n";
 *    }
 *  public:
 *    void print_on_some_machines(std::string s) {
 *      std::vector<procid_t> procs;
 *      procs.push_back(1); procs.push_back(3); procs.push_back(5);
 *
 *      // calls the print function on machine 1,3,5 with the argument "s"
 *      rmi.remote_call(procs.begin(), procs.end(),
 *                      &distributed_obj_example::print, s);
 *    }
 * }
 * \endcode
 *
 *
 * \param machine_begin The beginning of an iterator range containing a list
 *                      machines to call.  Iterator::value_type must be
 *                      castable to procid_t.
 * \param machine_end   The end of an iterator range containing a list
 *                      machines to call.  Iterator::value_type must be
 *                      castable to procid_t.
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 */
  void remote_call(Iterator machine_begin, Iterator machine_end, Fn fn, ...);


/**
 * \brief Performs a blocking RPC call to the target machine
 * to run the provided function pointer.
 *
 * remote_request() calls the function "fn" on a target remote machine. Provided
 * arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.
 *
 * Unlike remote_call(), remote_request() is blocking and waits for the target
 * machine to complete execution of the function. However, different
 * remote_requests may be still be handled by different threads on the target
 * machine.
 *
 * Example:
 * \code
 * // A print function is defined in the distributed object
 * class distributed_obj_example {
 *  graphlab::dc_dist_object<distributed_obj_example> rmi;
 *   ... initialization and constructor ...
 *  private:
 *    int add_one(int i) {
 *      return i + 1;
 *    }
 *  public:
 *    int add_one_from_machine_1(int i) {
 *      // calls the add_one function on machine 1 with the argument i
 *      return rmi.remote_request(1, &distributed_obj_example::add_one, i);
 *    }
 * }
 * \endcode
 *
 * \see graphlab::object_fiber_remote_request
 *      graphlab::dc_dist_object::future_remote_request
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns the same return type as the function fn
 */
  RetVal remote_request(procid_t targetmachine, Fn fn, ...);


/**
 * \brief Performs a nonblocking RPC call to the target machine
 * to run the provided function pointer which has an expected return value.
 *
 * future_remote_request() calls the function "fn" on a target remote machine.
 * Provided arguments are serialized and sent to the target.
 * Therefore, all arguments are necessarily transmitted by value.
 * If the target function has a return value, it is sent back to calling
 * machine.
 *
 * future_remote_request() is like remote_request(), but is non-blocking.
 * Instead, it returns immediately a \ref graphlab::request_future object
 * which will allow you wait for the return value.
 *
 * Example:
 * \code
 * // A print function is defined in the distributed object
 * class distributed_obj_example {
 *  graphlab::dc_dist_object<distributed_obj_example> rmi;
 *   ... initialization and constructor ...
 *  private:
 *    int add_one(int i) {
 *      return i + 1;
 *    }
 *  public:
 *    int add_one_from_machine_1(int i) {
 *      // calls the add_one function on machine 1 with the argument i
 *      // this call returns immediately
 *      graphlab::request_future<int> future =
 *          rmi.future_remote_request(1, &distributed_obj_example::add_one, i);
 *
 *      // ... we can do other stuff here
 *      // then when we want the answer
 *      int result = future();
 *      return result;
 *    }
 * }
 * \endcode
 *
 * \see graphlab::object_fiber_remote_request
 *      graphlab::dc_dist_object::remote_request
 *
 * \param targetmachine The ID of the machine to run the function on
 * \param fn The function to run on the target machine. Must be a pointer to
 *            member function in the owning object.
 * \param ... The arguments to send to Fn. Arguments must be serializable.
 *            and must be castable to the target types.
 *
 * \returns Returns a future templated around the same type as the return 
 *          value of the called function
 */
  request_future<RetVal> future_remote_request(procid_t targetmachine, Fn fn, ...);


#endif
/*****************************************************************************
                      Implementation of matched send_to / recv_from
 *****************************************************************************/


 private:
  std::vector<dc_impl::recv_from_struct> recv_froms;

  void block_and_wait_for_recv(size_t src,
                             std::string& str,
                             size_t tag) {
    recv_froms[src].lock.lock();
    recv_froms[src].data = str;
    recv_froms[src].tag = tag;
    recv_froms[src].hasdata = true;
    recv_froms[src].cond.signal();
    recv_froms[src].lock.unlock();
  }

 public:

  /**
    \copydoc distributed_control::send_to()
  */
  template <typename U>
  void send_to(procid_t target, U& t, bool control = false) {
    std::stringstream strm;
    oarchive oarc(strm);
    oarc << t;
    strm.flush();
    dc_impl::basic_reply_container rt;
    // I shouldn't use a request to block here since
    // that will take up a thread on the remote side
    // so I simulate a request here.
    size_t rtptr = reinterpret_cast<size_t>(&rt);
    if (control == false) {
      internal_call(target, &dc_dist_object<T>::block_and_wait_for_recv,
                     procid(), strm.str(), rtptr);
    }
    else {
      internal_control_call(target, &dc_dist_object<T>::block_and_wait_for_recv,
                  procid(), strm.str(), rtptr);
    }
    // wait for reply
    rt.wait();

    if (control == false) inc_calls_sent(target);
  }


  /**
    \copydoc distributed_control::recv_from()
  */
  template <typename U>
  void recv_from(procid_t source, U& t, bool control = false) {
    // wait on the condition variable until I have data
    dc_impl::recv_from_struct &recvstruct = recv_froms[source];
    recvstruct.lock.lock();
    while (recvstruct.hasdata == false) {
      recvstruct.cond.wait(recvstruct.lock);
    }

    // got the data. deserialize it
    std::stringstream strm(recvstruct.data);
    iarchive iarc(strm);
    iarc >> t;
    // clear the data
    std::string("").swap(recvstruct.data);
    // remember the tag so we can unlock it before the remote call
    size_t tag = recvstruct.tag;
    // clear the has data flag
    recvstruct.hasdata = false;
    // unlock
    recvstruct.lock.unlock();
    if (control == false) {
      // remote call to release the sender. Use an empty blob
      dc_.control_call(source, request_reply_handler, tag, dc_impl::blob());
      // I have to increment the calls sent manually here
      // since the matched send/recv calls do not go through the
      // typical object calls. It goes through the DC, but I also want to charge
      // it to this object
      inc_calls_received(source);
    }
    else {
      dc_.control_call(source, request_reply_handler, tag, dc_impl::blob());
    }
  }


/*****************************************************************************
                      Implementation of Broadcast
 *****************************************************************************/

private:

  std::string broadcast_receive;

  void set_broadcast_receive(const std::string &s) {
    broadcast_receive = s;
  }


 public:

  /// \copydoc distributed_control::broadcast()
  template <typename U>
  void broadcast(U& data, bool originator, bool control = false) {
    if (originator) {
      // construct the data stream
      std::stringstream strm;
      oarchive oarc(strm);
      oarc << data;
      strm.flush();
      broadcast_receive = strm.str();
      if (control == false) {
        for (size_t i = 0;i < numprocs(); ++i) {
          if (i != procid()) {
            internal_request(i,
                            &dc_dist_object<T>::set_broadcast_receive,
                            broadcast_receive);
          }
        }
      }
      else {
        for (size_t i = 0;i < numprocs(); ++i) {
          if (i != procid()) {
            internal_control_request(i,
                                    &dc_dist_object<T>::set_broadcast_receive,
                                    broadcast_receive);
          }
        }
      }
    }

    // by the time originator gets here, all machines
    // will have received the data due to the broadcast_receive
    // set a barrier here.
    barrier();

    // all machines will now deserialize the data
    if (!originator) {
      std::stringstream strm(broadcast_receive);
      iarchive iarc(strm);
      iarc >> data;
    }
    barrier();
  }


/*****************************************************************************
      Implementation of Gather, all_gather
 *****************************************************************************/

 private:
  std::vector<std::string> gather_receive;
  atomic<size_t> gatherid;

  void set_gather_receive(procid_t source, const std::string &s, size_t gid) {
    while(gatherid.value != gid) sched_yield();
    gather_receive[source] = s;
  }
 public:

  /// \copydoc distributed_control::gather()
  template <typename U>
  void gather(std::vector<U>& data, procid_t sendto, bool control = false) {
    // if not root
    if (sendto != procid()) {
      std::stringstream strm( std::ios::out | std::ios::binary );
      oarchive oarc(strm);
      oarc << data[procid()];
      strm.flush();
      if (control == false) {
        internal_request(sendto,
                        &dc_dist_object<T>::set_gather_receive,
                        procid(),
                        strm.str(),
                        gatherid.value);
      }
      else {
        internal_control_request(sendto,
                                  &dc_dist_object<T>::set_gather_receive,
                                  procid(),
                                  strm.str(),
                                  gatherid.value);
      }
    }
    barrier();
    if (sendto == procid()) {
      // if I am the receiver
      for (procid_t i = 0; i < numprocs(); ++i) {
        if (i != procid()) {
          // receiving only from others
          std::stringstream strm(gather_receive[i],
                                 std::ios::in | std::ios::binary);
          assert(strm.good());
          iarchive iarc(strm);
          iarc >> data[i];
        }
      }
    }
    gatherid.inc();
    barrier();
  }

/********************************************************************
             Implementation of all gather
*********************************************************************/


 private:
  // ------- Sense reversing barrier data ----------
  /// The next value of the barrier. either +1 or -1
  int ab_barrier_sense;
  /// When this flag == the current barrier value. The barrier is complete
  int ab_barrier_release;
  /** when barrier sense is 1, barrier clears when
   * child_barrier_counter == numchild. When barrier sense is -1, barrier
   * clears when child_barrier_counter == 0;
   */
  atomic<int> ab_child_barrier_counter;
  /// condition variable and mutex protecting the barrier variables
  fiber_conditional ab_barrier_cond;
  mutex ab_barrier_mut;
  std::string ab_children_data[BARRIER_BRANCH_FACTOR];
  std::string ab_alldata;

  /**
    The child calls this function in the parent once the child enters the barrier
  */
  void __ab_child_to_parent_barrier_trigger(procid_t source, std::string collect) {
    ab_barrier_mut.lock();
    // assert childbase <= source <= childbase + BARRIER_BRANCH_FACTOR
    ASSERT_GE(source, childbase);
    ASSERT_LT(source, childbase + BARRIER_BRANCH_FACTOR);
    ab_children_data[source - childbase] = collect;
    ab_child_barrier_counter.inc(ab_barrier_sense);
    ab_barrier_cond.signal();
    ab_barrier_mut.unlock();
  }

  /**
    This is on the downward pass of the barrier. The parent calls this function
    to release all the children's barriers
  */
  void __ab_parent_to_child_barrier_release(int releaseval,
                                            std::string allstrings,
                                            int use_control_calls) {
    // send the release downwards
    // get my largest child
    logger(LOG_DEBUG, "AB Barrier Release %d", releaseval);
    ab_alldata = allstrings;
    for (procid_t i = 0;i < numchild; ++i) {
      if (use_control_calls) {
        internal_control_call((procid_t)(childbase + i),
                              &dc_dist_object<T>::__ab_parent_to_child_barrier_release,
                              releaseval,
                              ab_alldata,
                              use_control_calls);
      }
      else {
        internal_call((procid_t)(childbase + i),
                      &dc_dist_object<T>::__ab_parent_to_child_barrier_release,
                      releaseval,
                      ab_alldata,
                      use_control_calls);
      }
    }
    ab_barrier_mut.lock();
    ab_barrier_release = releaseval;
    ab_barrier_cond.signal();
    ab_barrier_mut.unlock();
  }


 public:

  /// \copydoc distributed_control::all_gather()
  template <typename U>
  void all_gather(std::vector<U>& data, bool control = false) {
    if (numprocs() == 1) return;
    // get the string representation of the data
    charstream strm(128);
    oarchive oarc(strm);
    oarc << data[procid()];
    strm.flush();
    // upward message
    int ab_barrier_val = ab_barrier_sense;
    ab_barrier_mut.lock();
    // wait for all children to be done
    while(1) {
      if ((ab_barrier_sense == -1 && ab_child_barrier_counter.value == 0) ||
          (ab_barrier_sense == 1 && ab_child_barrier_counter.value == (int)(numchild))) {
        // flip the barrier sense
        ab_barrier_sense = -ab_barrier_sense;
        // call child to parent in parent
        ab_barrier_mut.unlock();
        if (procid() != 0) {
          // collect all my children data
          charstream strstrm(128);
          oarchive oarc2(strstrm);
          oarc2 << std::string(strm->c_str(), strm->size());
          for (procid_t i = 0;i < numchild; ++i) {
            strstrm.write(ab_children_data[i].c_str(), ab_children_data[i].length());
          }
          strstrm.flush();
          if (control) {
            internal_control_call(parent,
                            &dc_dist_object<T>::__ab_child_to_parent_barrier_trigger,
                            procid(),
                            std::string(strstrm->c_str(), strstrm->size()));
          }
          else {
            internal_call(parent,
                          &dc_dist_object<T>::__ab_child_to_parent_barrier_trigger,
                          procid(),
                          std::string(strstrm->c_str(), strstrm->size()));
          }
        }
        break;
      }
      ab_barrier_cond.wait(ab_barrier_mut);
    }


    logger(LOG_DEBUG, "AB barrier phase 1 complete");
    // I am root. send the barrier release downwards
    if (procid() == 0) {
      ab_barrier_release = ab_barrier_val;
      // build the downward data
      charstream strstrm(128);
      oarchive oarc2(strstrm);
      oarc2 << std::string(strm->c_str(), strm->size());
      for (procid_t i = 0;i < numchild; ++i) {
        strstrm.write(ab_children_data[i].c_str(), ab_children_data[i].length());
      }
      strstrm.flush();
      ab_alldata = std::string(strstrm->c_str(), strstrm->size());
      for (procid_t i = 0;i < numchild; ++i) {
        logger(LOG_DEBUG, "Sending AB release to %d", childbase + i);
        internal_control_call((procid_t)(childbase + i),
                             &dc_dist_object<T>::__ab_parent_to_child_barrier_release,
                             ab_barrier_val,
                             ab_alldata,
                             (int)control);

      }
    }
    // wait for the downward message releasing the barrier
    logger(LOG_DEBUG, "AB barrier waiting for %d", ab_barrier_val);
    ab_barrier_mut.lock();
    while(1) {
      if (ab_barrier_release == ab_barrier_val) break;
      ab_barrier_cond.wait(ab_barrier_mut);
    }
    // read the collected data and release the lock
    std::string local_ab_alldata = ab_alldata;
    ab_barrier_mut.unlock();

    logger(LOG_DEBUG, "barrier phase 2 complete");
    // now the data is a DFS search of a heap
    // I need to unpack it
    size_t heappos = 0;
    std::stringstream istrm(local_ab_alldata);
    iarchive iarc(istrm);

    for (size_t i = 0;i < numprocs(); ++i) {
      std::string s;
      iarc >> s;

      std::stringstream strm2(s);
      iarchive iarc2(strm2);
      iarc2 >> data[heappos];

      if (i + 1 == numprocs()) break;
      // advance heappos
      // leftbranch
      bool lefttraverseblock = false;
      while (1) {
        // can we continue going deaper down the left?
        size_t leftbranch = heappos * BARRIER_BRANCH_FACTOR + 1;
        if (lefttraverseblock == false && leftbranch < numprocs()) {
          heappos = leftbranch;
          break;
        }
        // ok. can't go down the left
        bool this_is_a_right_branch = (((heappos - 1) % BARRIER_BRANCH_FACTOR) == BARRIER_BRANCH_FACTOR - 1);
        // if we are a left branch, go to sibling
        if (this_is_a_right_branch == false) {
          size_t sibling = heappos + 1;
          if (sibling < numprocs()) {
            heappos = sibling;
            break;
          }
        }

        // we have finished this subtree, go back up to parent
        // and block the depth traversal on the next round
        // unless heappos is 0

        heappos = (heappos - 1) / BARRIER_BRANCH_FACTOR;
        lefttraverseblock = true;
        continue;
        // go to sibling
      }

    }
  }

  /// \copydoc distributed_control::all_reduce2()
  template <typename U, typename PlusEqual>
  void all_reduce2(U& data, PlusEqual plusequal, bool control = false) {
    if (numprocs() == 1) return;
    // get the string representation of the data
   /* charstream strm(128);
    oarchive oarc(strm);
    oarc << data;
    strm.flush();*/
    // upward message
    int ab_barrier_val = ab_barrier_sense;
    ab_barrier_mut.lock();
    // wait for all children to be done
    while(1) {
      if ((ab_barrier_sense == -1 && ab_child_barrier_counter.value == 0) ||
          (ab_barrier_sense == 1 && ab_child_barrier_counter.value == (int)(numchild))) {
        // flip the barrier sense
        ab_barrier_sense = -ab_barrier_sense;
        // call child to parent in parent
        ab_barrier_mut.unlock();
        if (procid() != 0) {
          // accumulate my children data
          for (procid_t i = 0;i < numchild; ++i) {
            std::stringstream istrm(ab_children_data[i]);
            iarchive iarc(istrm);
            U tmp;
            iarc >> tmp;
            plusequal(data, tmp);
          }
          // upward message
          charstream ostrm(128);
          oarchive oarc(ostrm);
          oarc << data;
          ostrm.flush();
          if (control) {
            internal_control_call(parent,
                            &dc_dist_object<T>::__ab_child_to_parent_barrier_trigger,
                            procid(),
                            std::string(ostrm->c_str(), ostrm->size()));
          }
          else {
            internal_call(parent,
                          &dc_dist_object<T>::__ab_child_to_parent_barrier_trigger,
                          procid(),
                          std::string(ostrm->c_str(), ostrm->size()));
          }
        }
        break;
      }
      ab_barrier_cond.wait(ab_barrier_mut);
    }


    logger(LOG_DEBUG, "AB barrier phase 1 complete");
    // I am root. send the barrier release downwards
    if (procid() == 0) {
      ab_barrier_release = ab_barrier_val;
      for (procid_t i = 0;i < numchild; ++i) {
        std::stringstream istrm(ab_children_data[i]);
        iarchive iarc(istrm);
        U tmp;
        iarc >> tmp;
        plusequal(data, tmp);
      }
      // build the downward data
      charstream ostrm(128);
      oarchive oarc(ostrm);
      oarc << data;
      ostrm.flush();
      ab_alldata = std::string(ostrm->c_str(), ostrm->size());
      for (procid_t i = 0;i < numchild; ++i) {
        internal_control_call((procid_t)(childbase + i),
                             &dc_dist_object<T>::__ab_parent_to_child_barrier_release,
                             ab_barrier_val,
                             ab_alldata,
                             (int)control);

      }
    }
    // wait for the downward message releasing the barrier
    logger(LOG_DEBUG, "AB barrier waiting for %d", ab_barrier_val);
    ab_barrier_mut.lock();
    while(1) {
      if (ab_barrier_release == ab_barrier_val) break;
      ab_barrier_cond.wait(ab_barrier_mut);
    }

    if (procid() != 0) {
      // read the collected data and release the lock
      std::string local_ab_alldata = ab_alldata;
      ab_barrier_mut.unlock();

      logger(LOG_DEBUG, "barrier phase 2 complete");

      std::stringstream istrm(local_ab_alldata);
      iarchive iarc(istrm);
      iarc >> data;
    }
    else {
      ab_barrier_mut.unlock();
    }
  }


  template <typename U>
  struct default_plus_equal {
    void operator()(U& u, const U& v) {
      u += v;
    }
  };

  /// \copydoc distributed_control::all_reduce()
  template <typename U>
  void all_reduce(U& data, bool control = false) {
    all_reduce2(data, default_plus_equal<U>(), control);
  }

////////////////////////////////////////////////////////////////////////////


/*****************************************************************************
                      Implementation of All Scatter
 *****************************************************************************/

  template <typename U>
  void all_to_all(std::vector<U>& data, bool control = false) {
    ASSERT_EQ(data.size(), numprocs());
    for (size_t i = 0;i < data.size(); ++i) {
      if (i != procid()) {
        std::stringstream strm( std::ios::out | std::ios::binary );
        oarchive oarc(strm);
        oarc << data[i];
        strm.flush();
        if (control == false) {
          internal_call(i,
                        &dc_dist_object<T>::set_gather_receive,
                        procid(),
                        strm.str(),
                        gatherid.value);
        }
        else {
          internal_control_call(i,
                                &dc_dist_object<T>::set_gather_receive,
                                procid(),
                                strm.str(),
                                gatherid.value);
        }
      }
    }
    full_barrier();
    for (size_t i = 0; i < data.size(); ++i) {
      if (i != procid()) {
        std::stringstream strm(gather_receive[i],
                               std::ios::in | std::ios::binary);
        assert(strm.good());
        iarchive iarc(strm);
        iarc >> data[i];
      }
    }
    gatherid.inc();
    barrier();
  }


/*****************************************************************************
                      Implementation of Barrier
 *****************************************************************************/


 private:
  // ------- Sense reversing barrier data ----------
  /// The next value of the barrier. either +1 or -1
  int barrier_sense;
  /// When this flag == the current barrier value. The barrier is complete
  int barrier_release;
  /** when barrier sense is 1, barrier clears when
   * child_barrier_counter == numchild. When barrier sense is -1, barrier
   * clears when child_barrier_counter == 0;
   */
  atomic<int> child_barrier_counter;
  /// condition variable and mutex protecting the barrier variables
  fiber_conditional barrier_cond;
  mutex barrier_mut;
  procid_t parent;  /// parent node
  size_t childbase; /// id of my first child
  procid_t numchild;  /// number of children


  /**
    The child calls this function in the parent once the child enters the barrier
  */
  void __child_to_parent_barrier_trigger(procid_t source) {
    barrier_mut.lock();
    // assert childbase <= source <= childbase + BARRIER_BRANCH_FACTOR
    ASSERT_GE(source, childbase);
    ASSERT_LT(source, childbase + BARRIER_BRANCH_FACTOR);
    child_barrier_counter.inc(barrier_sense);
    barrier_cond.signal();
    barrier_mut.unlock();
  }

  /**
    This is on the downward pass of the barrier. The parent calls this function
    to release all the children's barriers
  */
  void __parent_to_child_barrier_release(int releaseval) {
    // send the release downwards
    // get my largest child
    logger(LOG_DEBUG, "Barrier Release %d", releaseval);
    for (procid_t i = 0;i < numchild; ++i) {
      internal_control_call((procid_t)(childbase + i),
                            &dc_dist_object<T>::__parent_to_child_barrier_release,
                            releaseval);

    }
    barrier_mut.lock();
    barrier_release = releaseval;
    barrier_cond.signal();
    barrier_mut.unlock();
  }


 public:

  /// \copydoc distributed_control::barrier()
  void barrier() {
    // upward message
    int barrier_val = barrier_sense;
    barrier_mut.lock();
    // wait for all children to be done
    while(1) {
      if ((barrier_sense == -1 && child_barrier_counter.value == 0) ||
          (barrier_sense == 1 && child_barrier_counter.value == (int)(numchild))) {
        // flip the barrier sense
        barrier_sense = -barrier_sense;
        // call child to parent in parent
        barrier_mut.unlock();
        if (procid() != 0) {
          internal_control_call(parent,
                           &dc_dist_object<T>::__child_to_parent_barrier_trigger,
                           procid());
        }
        break;
      }
      barrier_cond.wait(barrier_mut);
    }


    logger(LOG_DEBUG, "barrier phase 1 complete");
    // I am root. send the barrier release downwards
    if (procid() == 0) {
      barrier_release = barrier_val;

      for (procid_t i = 0;i < numchild; ++i) {
        internal_control_call((procid_t)(childbase + i),
                             &dc_dist_object<T>::__parent_to_child_barrier_release,
                             barrier_val);

      }
    }
    // wait for the downward message releasing the barrier
    logger(LOG_DEBUG, "barrier waiting for %d", barrier_val);
    barrier_mut.lock();
    while(1) {
      if (barrier_release == barrier_val) break;
      barrier_cond.wait(barrier_mut);
    }
    barrier_mut.unlock();

    logger(LOG_DEBUG, "barrier phase 2 complete");
  }


 /*****************************************************************************
                      Implementation of Full Barrier
*****************************************************************************/
 private:
  mutex full_barrier_lock;
  fiber_conditional full_barrier_cond;
  std::vector<size_t> calls_to_receive;
  // used to inform the counter that the full barrier
  // is in effect and all modifications to the calls_recv
  // counter will need to lock and signal
  volatile bool full_barrier_in_effect;

  /** number of 'source' processor counts which have
  not achieved the right recv count */
  atomic<size_t> num_proc_recvs_incomplete;

  /// Marked as 1 if the proc is complete
  dense_bitset procs_complete;

 public:

  /// \copydoc distributed_control::full_barrier()
  void full_barrier() {
    // gather a sum of all the calls issued to machine 0
    std::vector<size_t> calls_sent_to_target(numprocs(), 0);
    for (size_t i = 0;i < numprocs(); ++i) {
      calls_sent_to_target[i] = callssent[i].value;
    }

    // tell node 0 how many calls there are
    std::vector<std::vector<size_t> > all_calls_sent(numprocs());
    all_calls_sent[procid()] = calls_sent_to_target;
    all_gather(all_calls_sent, true);

    // get the number of calls I am supposed to receive from each machine
    calls_to_receive.clear(); calls_to_receive.resize(numprocs(), 0);
    for (size_t i = 0;i < numprocs(); ++i) {
      calls_to_receive[i] += all_calls_sent[i][procid()];
    }
    // clear the counters
    num_proc_recvs_incomplete.value = numprocs();
    procs_complete.clear();
    // activate the full barrier
    full_barrier_in_effect = true;
    __asm("mfence");
    // begin one pass to set all which are already completed
    for (size_t i = 0;i < numprocs(); ++i) {
      if (callsreceived[i].value >= calls_to_receive[i]) {
        if (procs_complete.set_bit(i) == false) {
          num_proc_recvs_incomplete.dec();
        }
      } else {
        logstream(LOG_DEBUG) << "Expecting " << calls_to_receive[i] 
                             << " calls from " << i << " but only " 
                             << callsreceived[i].value << "received." << std::endl;
      }
    }

    full_barrier_lock.lock();
    while (num_proc_recvs_incomplete.value > 0) {
      logstream(LOG_DEBUG) << "Calls Incomplete. Waiting." << std::endl;
      full_barrier_cond.wait(full_barrier_lock);
    }
    full_barrier_lock.unlock();
    full_barrier_in_effect = false;
//     for (size_t i = 0; i < numprocs(); ++i) {
//       std::cout << "Received " << global_calls_received[i].value << " from " << i << std::endl;
//     }
    barrier();
  }

 /* --------------------  Implementation of Gather Statistics -----------------*/
 private:
  struct collected_statistics {
    size_t callssent;
    size_t bytessent;
    collected_statistics(): callssent(0), bytessent(0) { }
    void save(oarchive &oarc) const {
      oarc << callssent << bytessent;
    }
    void load(iarchive &iarc) {
      iarc >> callssent >> bytessent;
    }
  };
 public:
  /** Gather RPC statistics. All machines must call
   this function at the same time. However, only proc 0 will
   return values */
  std::map<std::string, size_t> gather_statistics() {
    std::map<std::string, size_t> ret;

    std::vector<collected_statistics> stats(numprocs());
    stats[procid()].callssent = calls_sent();
    stats[procid()].bytessent = bytes_sent();
    logstream(LOG_INFO) << procid() << ": calls_sent: ";
    for (size_t i = 0;i < numprocs(); ++i) {
      logstream(LOG_INFO) << callssent[i].value << ", ";
    }
    logstream(LOG_INFO) << std::endl;
    logstream(LOG_INFO) << procid() << ": calls_recv: ";
    for (size_t i = 0;i < numprocs(); ++i) {
      logstream(LOG_INFO) << callsreceived[i].value << ", ";
    }
    logstream(LOG_INFO) << std::endl;


    gather(stats, 0, true);
    if (procid() == 0) {
      collected_statistics cs;
      for (size_t i = 0;i < numprocs(); ++i) {
        cs.callssent += stats[i].callssent;
        cs.bytessent += stats[i].bytessent;
      }
      ret["total_calls_sent"] = cs.callssent;
      ret["total_bytes_sent"] = cs.bytessent;
    }
    return ret;
  }
};

#include <graphlab/macros_undef.hpp>
#include <graphlab/rpc/mem_function_arg_types_undef.hpp>
#undef BARRIER_BRANCH_FACTOR
}// namespace graphlab
#endif


================================================
FILE: src/graphlab/rpc/dc_dist_object_base.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_DIST_OBJECT_BASE_HPP
#define GRAPHLAB_DC_DIST_OBJECT_BASE_HPP
#include <vector>
#include <graphlab/rpc/dc_internal_types.hpp>
namespace graphlab {

namespace dc_impl {
/**
 * \ingroup rpc
 * \internal
Provides an interface for extracting and updating counters from dc_dist_objects
*/
class dc_dist_object_base{
 public:

  virtual ~dc_dist_object_base() { } 

  /// Increment the number of calls sent from this object
  virtual void inc_calls_sent(procid_t source) = 0;
  /// Increment the number of calls received by this object
  virtual void inc_calls_received(procid_t dest) = 0;
  
  /// Increment the number of bytes sent from this object
  virtual void inc_bytes_sent(procid_t target, size_t bytes) = 0;

  /// Return the number of calls received by this object
  virtual size_t calls_received() const = 0;
  /// Return the number of calls sent from this object
  virtual size_t calls_sent() const = 0;
};

}
}

#endif


================================================
FILE: src/graphlab/rpc/dc_init_from_env.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstdio>
#include <cstdlib>
#include <string>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_env.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/logger/logger.hpp>
namespace graphlab {

bool init_param_from_env(dc_init_param& param) {
  char* nodeid = getenv("SPAWNID");
  if (nodeid == NULL) {
    return false;
  }
  param.curmachineid = atoi(nodeid);

  char* nodes = getenv("SPAWNNODES");
  std::string nodesstr = nodes;
  if (nodes == NULL) {
    return false;
  }

  param.machines = strsplit(nodesstr, ",");
  for (size_t i = 0;i < param.machines.size(); ++i) {
    param.machines[i] = param.machines[i] + ":" + tostr(10000 + i);
  }
  // set defaults
  param.numhandlerthreads = RPC_DEFAULT_NUMHANDLERTHREADS;
  param.commtype = RPC_DEFAULT_COMMTYPE;
  return true;
}

} // namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_init_from_env.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_INIT_FROM_ENV_HPP
#define GRAPHLAB_DC_INIT_FROM_ENV_HPP
#include <graphlab/rpc/dc.hpp>
namespace graphlab {
  /** 
   * \ingroup rpc
   * initializes parameters from environment. Returns true on success */
  bool init_param_from_env(dc_init_param& param);
}

#endif // GRAPHLAB_DC_INIT_FROM_ENV_HPP


================================================
FILE: src/graphlab/rpc/dc_init_from_mpi.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <string>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/net_util.hpp>
#include <graphlab/logger/logger.hpp>

#ifdef HAS_MPI
#include <graphlab/util/mpi_tools.hpp>
#endif
namespace graphlab {

bool init_param_from_mpi(dc_init_param& param,dc_comm_type commtype) {
#ifdef HAS_MPI
  ASSERT_MSG(commtype == TCP_COMM, "MPI initialization only supports TCP at the moment");
  // Look for a free port to use. 
  std::pair<size_t, int> port_and_sock = get_free_tcp_port();
  size_t port = port_and_sock.first;
  int sock = port_and_sock.second;
  
  std::string ipaddr = 
      get_local_ip_as_str(mpi_tools::rank() == 0 /* print stuff only if I am master */);
  ipaddr = ipaddr + ":" + tostr(port);
  // now do an allgather
  logstream(LOG_INFO) << "Will Listen on: " << ipaddr << std::endl;
  std::vector<std::string> machines;
  mpi_tools::all_gather(ipaddr, param.machines);
  // set defaults
  param.curmachineid = (procid_t)(mpi_tools::rank());

  param.numhandlerthreads = RPC_DEFAULT_NUMHANDLERTHREADS;
  param.commtype = commtype;
  param.initstring = param.initstring + std::string(" __sockhandle__=") + tostr(sock) + " ";
  return true;
#else
  std::cerr << "MPI Support not compiled!" << std::endl;
  exit(0);
#endif
}

} // namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_init_from_mpi.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_INIT_FROM_MPI_HPP
#define GRAPHLAB_DC_INIT_FROM_MPI_HPP
#include <graphlab/rpc/dc.hpp>
namespace graphlab {
  /**
   * \ingroup rpc 
   * initializes parameters from MPI. Returns true on success
      MPI must be initialized before calling this function */
  bool init_param_from_mpi(dc_init_param& param, dc_comm_type commtype = TCP_COMM);
}

#endif // GRAPHLAB_DC_INIT_FROM_MPI_HPP


================================================
FILE: src/graphlab/rpc/dc_init_from_zookeeper.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
#include <algorithm>
#include <boost/bind.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/zookeeper/server_list.hpp>
#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/net_util.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/logger.hpp>
namespace graphlab {

void zk_callback(zookeeper::server_list* slist,
                std::string name_space,
                std::vector<std::string> servers,
                std::vector<std::string>& result,
                size_t num_to_watch_for,
                mutex& result_lock,
                conditional& result_cond) {
  if (servers.size() == num_to_watch_for) {
    result_lock.lock();
    result = servers;
    slist->stop_watching("graphlab");
    result_cond.signal();
    result_lock.unlock();
  }
}


bool init_param_from_zookeeper(dc_init_param& param) {
  char* zk_hosts = getenv("ZK_SERVERS");
  char* zk_jobname = getenv("ZK_JOBNAME");
  char* zk_numnodes = getenv("ZK_NUMNODES");
  if (zk_hosts == NULL || zk_jobname == NULL || zk_numnodes == NULL) {
    return false;
  }

  std::vector<std::string> zk_hosts_list = strsplit(zk_hosts, ",");

  // number of nodes to wait for
  size_t numnodes = atoi(zk_numnodes);
  ASSERT_GE(numnodes, 1);
  logstream(LOG_EMPH) << "Using Zookeeper for Initialization. Waiting for "
                      << numnodes << " to join" << std::endl;

  // generate a unique identifier for this server

  std::pair<size_t, int> port_and_sock = get_free_tcp_port();
  size_t port = port_and_sock.first;
  int sock = port_and_sock.second;
  std::string ipaddr = get_local_ip_as_str(true);
  ipaddr = ipaddr + ":" + tostr(port);
  logstream(LOG_INFO) << "Will Listen on: " << ipaddr << std::endl;

  // get an ip address
  zookeeper::server_list server_list(zk_hosts_list,
                                     zk_jobname,
                                     ipaddr);

  // final server list goes here
  std::vector<std::string> received_servers;
  // locks to product the final server list
  mutex lock;
  conditional cond;

  // construct the watch to watch for changes on zookeeper
  server_list.set_callback(boost::bind(zk_callback,
                                       _1,
                                       _2,
                                       _3,
                                       boost::ref(received_servers),
                                       numnodes,
                                       boost::ref(lock),
                                       boost::ref(cond)));

  server_list.join("graphlab");

  lock.lock();
  received_servers = server_list.watch_changes("graphlab");
  // wait until I get all the servers
  // TODO: add a timeout
  while(received_servers.size() < numnodes) cond.wait(lock);
  lock.unlock();

  // done!
  // now make sure that everyone sees the server list in the same order

  ASSERT_EQ(received_servers.size(), numnodes);
  std::sort(received_servers.begin(), received_servers.end());

  // now fill the parameter list
  param.machines = received_servers;
  param.curmachineid = std::find(received_servers.begin(), received_servers.end(),
                                 ipaddr) - received_servers.begin();
  ASSERT_LT(param.curmachineid, received_servers.size());
  param.numhandlerthreads = RPC_DEFAULT_NUMHANDLERTHREADS;
  param.commtype = RPC_DEFAULT_COMMTYPE;
  param.initstring = param.initstring + std::string(" __sockhandle__=") + tostr(sock) + " ";
  // detach from the server list
  // now, this takes advantage of the Zookeeper feature that
  // every machine sees all changes in the same order.
  // i.e. At some point, everyone would have seen a complete server list.
  // Once that happens, everyone can leave.
  server_list.set_callback(NULL);
  server_list.leave("graphlab");

  return true;
}

} // namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_init_from_zookeeper.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DC_INIT_FROM_ZOOKEEPER_HPP
#define GRAPHLAB_DC_INIT_FROM_ZOOKEEPER_HPP
#include <graphlab/rpc/dc.hpp>
namespace graphlab {
  /**
   * \ingroup rpc
   * initializes parameters from ZooKeeper. Returns true on success.
   * To initialize from Zookeeper, the following environment variables must be set
   *
   * ZK_SERVERS: A comma separated list of zookeeper servers. Port
   *             number must be included.
   * ZK_JOBNAME: The name of the job to use. This must be unique to the cluster.
   *             i.e. no other job with the same name must run at the same time
   * ZK_NUMNODES: The number of processes to wait for
   *
   */
  bool init_param_from_zookeeper(dc_init_param& param);
}

#endif // GRAPHLAB_DC_INIT_FROM_ZOOKEEPER_HPP


================================================
FILE: src/graphlab/rpc/dc_internal_types.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_INTERNAL_TYPES_HPP
#define DC_INTERNAL_TYPES_HPP
#include <boost/function.hpp>
#include <boost/unordered_map.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/util/resizing_array_sink.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
namespace graphlab {
class distributed_control;

namespace dc_impl {  

/** 
 * \internal
 * \ingroup rpc
 * The type of the callback function used by the communications
classes when data is received*/
typedef void (*comm_recv_callback_type)(void* tag, procid_t src, 
                                        const char* buf, size_t len);

/**
 * \internal
 * \ingroup rpc
 * The type of the local function call dispatcher.
 * \see dispatch_type2
 */
typedef void (*dispatch_type)(distributed_control& dc, procid_t, unsigned char, const char* data, size_t len);

/**
 *\internal
 * \ingroup rpc
 * A second type of the local function call dispatcher.
 * Currently only used by POD calls. TODO: to move all other call
 * systems to use dispatch2.
 * \see dispatch_type
 */
typedef void (*dispatch_type2)(distributed_control& dc, procid_t, unsigned char, const char* data, size_t len);


typedef boost::unordered_map<std::string, dispatch_type> dispatch_map_type;

// commm capabilities
const size_t COMM_STREAM = 1;
const size_t COMM_DATAGRAM = 0;

/**
 * \internal
 * \ingroup rpc
 * The header form of each packet */
struct packet_hdr {
  uint32_t len; /// length of the packet
  procid_t src; /// source machine
  unsigned char packet_type_mask; /// the types are in dc_packet_mask.hpp
  unsigned char sequentialization_key;
};

typedef uint32_t block_header_type;

/**
 * \internal
 * \ingroup rpc
 * special handling for the only pointer datatype 
we natively support serialization for. Basically,
we must delete it. if charstring_free is called on a
char*, it will be deleted. Otherwise it will not do anything*/
template <typename T> 
inline void charstring_free(T& t) { }

/**
 * \internal
 * \ingroup rpc
 */
template <>
inline void charstring_free<char*>(char* &c){
    delete [] c;
};


/**
 * \internal
 * \ingroup rpc
 * 
 * The data needed to receive the matched send / recvs */
struct recv_from_struct {
  inline recv_from_struct():tag(0), hasdata(false) { }
  
  std::string data;
  size_t tag;
  mutex lock;
  conditional cond;
  bool hasdata;
  
};

/**
 * \internal
 * \ingroup rpc
 * Used for termination detection
 */
struct terminator_token {
  terminator_token():calls_sent(0),calls_recv(0),terminate(false) { }
  terminator_token(size_t sent, size_t recv):calls_sent(sent),
                          calls_recv(recv),terminate(false) { }
  size_t calls_sent;
  size_t calls_recv;
  bool terminate;
};


/**
 * Used to maintain a linked list of buffers.
 */
struct buffer_elem {
  char* buf;
  size_t len;
  buffer_elem* next;
};

}
}

SERIALIZABLE_POD(graphlab::dc_impl::terminator_token);
#endif


================================================
FILE: src/graphlab/rpc/dc_packet_mask.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_PACKET_MASK_HPP
#define DC_PACKET_MASK_HPP
namespace graphlab {
  // ---------  Packet header types --------------

  /**
   * \internal
   * \ingroup rpc
   * Used for regular calls which go into a thread pool
   * for evaluation
   */
  const unsigned char STANDARD_CALL = 1;

  /**
   * \internal
    \ingroup rpc
   * 
    If control packet flag is set, this packet 
    does not increment any counters.
  */
  const unsigned char CONTROL_PACKET = 16; 

  /**
   * \internal
   * \ingroup rpc
   * 
   * Used to identify that after sending this 
   * packet, a flush is required
   */
  const unsigned char FLUSH_PACKET = 64;
}
#endif


================================================
FILE: src/graphlab/rpc/dc_receive.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_RECEIVE_HPP
#define DC_RECEIVE_HPP
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/parallel/atomic.hpp>
namespace graphlab {
namespace dc_impl {
  
/**
\ingroup rpc
\internal
Base class of the data receiving class.
This class forms the receiving side of a "multiplexer"
Data entering from a single socket will be passed to this
function through the incoming_data function call.

This class must understand the packet header and issue the right
calls in the owning dc. 
*/
class dc_receive {
 public:
  dc_receive() { };
  virtual ~dc_receive() { };

  /**
    gets a buffer. The buffer length is returned in retbuflength
    This will be used for receiving data.
    If get_buffer() or advance_buffer() is called,
    incoming_data will never be called.
  */
  virtual char* get_buffer(size_t& retbuflength) = 0;
  
  /**
    Commits a buffer obtained using get_buffer.
    c will be the result of a previous call to get_buffer() or advance_buffer()
    This function should commit a range of bytes starting of c,
    up to 'wrotelength' bytes. A new empty buffer should be returned
    and the size is returned in retbuflength
  */
  virtual char* advance_buffer(char* c, size_t wrotelength, 
                              size_t& retbuflength) = 0;
  

  /**
   * Last call sent to any instance of dc_receive.
   * If the sender multithreads, the sending thread must shut down.
   */
  virtual void shutdown() = 0;
};


} // namespace dc_impl
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/rpc/dc_send.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_SEND_HPP
#define DC_SEND_HPP
#include <sys/types.h>
#include <sys/socket.h>

#include <iostream>
#include <graphlab/rpc/circular_iovec_buffer.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/thread_local_send_buffer.hpp>
#include <graphlab/rpc/dc_types.hpp>
namespace graphlab {
namespace dc_impl {

/**
\ingroup rpc
\internal
Base class of the data sending class.
This class forms the sending side of a "multiplexer"
send_data() will be called with a packet mask as well as a
character stream containing the contents of the packet.
The class should accumulate the data in an iovec structure
and relinquish it on get_outgoing_data()
*/
class dc_send{
 public:
  dc_send() { }
  
  virtual ~dc_send() { }

  virtual void register_send_buffer(thread_local_buffer* buffer) = 0;
  virtual void unregister_send_buffer(thread_local_buffer* buffer) = 0;

  /**
    Bytes sent must be incremented BEFORE the data is transmitted.
    Packets marked CONTROL_PACKET should not be counted
  */
  virtual size_t bytes_sent() = 0;
 
  /**
   * flushes immediately
   */ 
  virtual void flush() = 0;

  /**
   * Requests a flush as soon as possible
   */
  virtual void flush_soon() = 0;


  /**
   * Writes a string to an internal buffer to be flushed later.
   * This is a "slow path" to be used only when the thread local buffer
   * is not available.
   */
  virtual void write_to_buffer(char* c, size_t len) = 0;

  virtual size_t set_option(std::string opt, size_t val) {
    return 0;
  }

  /**
   * Returns length if there is data, 0 otherwise. This function
   * must be reentrant, but it is guaranteed that only one thread will
   * call this function at anytime.
   */
  virtual size_t get_outgoing_data(circular_iovec_buffer& outdata) = 0;


  /**
   * Utility function: writes a packet header into an archive.
   * but returns an offset to the location of the length entry allowing it to
   * be filled in later.
   */
  inline static size_t write_packet_header(oarchive& oarc, 
                                           procid_t src, 
                                           unsigned char packet_type_mask, 
                                           unsigned char sequentialization_key) {
    size_t base = oarc.off;
    oarc.advance(sizeof(packet_hdr));
    packet_hdr* hdr = reinterpret_cast<packet_hdr*>(oarc.buf + (oarc.off - sizeof(packet_hdr)));
    hdr->len = 0;
    hdr->src = src;
    hdr->packet_type_mask = packet_type_mask;
    hdr->sequentialization_key = sequentialization_key;
    return base;
  }
};
  

} // namespace dc_impl
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/rpc/dc_services.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/dc_dist_object.hpp>
#ifndef GRAPHLAB_DC_SERVICES_HPP
#define GRAPHLAB_DC_SERVICES_HPP
#include <graphlab/parallel/pthread_tools.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
    \internal
    \ingroup rpc
    Creates a new context for MPI-like global global operations.
    Where all machines create an instance of dc_services at the same time,
    operations performed by the new dc_services instance will not interfere
    and will run in parallel with other contexts.  i.e. If I have two
    distributed dc_services instances, one instance can 
    perform a barrier while another instance performs a broadcast() at the same 
    time.
  */
  class dc_services {
  private:
    dc_dist_object<dc_services> rmi;
  
  public:
    dc_services(distributed_control &dc):rmi(dc, this) {  }
    
    /// Returns the underlying dc_dist_object 
    dc_dist_object<dc_services>& rmi_instance() {
      return rmi;
    }

    /// Returns the underlying dc_dist_object 
    const dc_dist_object<dc_services>& rmi_instance() const {
      return rmi;
    }
    
    /**
      \copydoc distributed_control::send_to()
    */
    template <typename U>
    inline void send_to(procid_t target, U& t, bool control = false) {
      rmi.send_to(target, t, control);
    }
    
    /**
      \copydoc distributed_control::recv_from()
     */
    template <typename U>
    inline void recv_from(procid_t source, U& t, bool control = false) {
      rmi.recv_from(source, t, control);
    }

    /**
      \copydoc distributed_control::broadcast()
     */
    template <typename U>
    inline void broadcast(U& data, bool originator, bool control = false) { 
      rmi.broadcast(data, originator, control);
    }

    /**
      \copydoc distributed_control::gather()
     */
    template <typename U>
    inline void gather(std::vector<U>& data, procid_t sendto, bool control = false) {
      rmi.gather(data, sendto, control);
    }

    /**
      \copydoc distributed_control::all_gather()
     */
    template <typename U>
    inline void all_gather(std::vector<U>& data, bool control = false) {
      rmi.all_gather(data, control);
    }

    /**
      \copydoc distributed_control::all_reduce()
     */
    template <typename U>
    inline void all_reduce(U& data, bool control = false) {
      rmi.all_reduce(data, control);
    }

    /// \copydoc distributed_control::all_reduce2()
    template <typename U, typename PlusEqual>
    void all_reduce2(U& data, PlusEqual plusequal, bool control = false) {
      rmi.all_reduce2(data, plusequal, control);
    }

    /// \copydoc distributed_control::barrier()
    inline void barrier() {
      rmi.barrier();
    }
    
    
    /// \copydoc distributed_control::full_barrier()
    inline void full_barrier() {
      rmi.full_barrier();
    }
  
 
  };


} // end of namespace graphlab


#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/rpc/dc_stream_receive.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <algorithm>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_stream_receive.hpp>

//#define DC_RECEIVE_DEBUG
namespace graphlab {
namespace dc_impl {


char* dc_stream_receive::get_buffer(size_t& retbuflength) {
  retbuflength = write_buffer_len - write_buffer_written;
  return writebuffer + write_buffer_written;
}


char* dc_stream_receive::advance_buffer(char* c, size_t wrotelength, 
                            size_t& retbuflength) {
  // find the last complete message we have read
  write_buffer_written += wrotelength;
  if (write_buffer_written >= sizeof(packet_hdr)) {
    size_t offset = 0;
    packet_hdr* hdr = reinterpret_cast<packet_hdr*>(writebuffer);
    // keep pushing the header until I reach a point where there is insufficient
    // room to read a header, or the message is not large enough
    while(offset + sizeof(packet_hdr) <= write_buffer_written &&
          offset + hdr->len + sizeof(packet_hdr) <= write_buffer_written) {
      offset += hdr->len + sizeof(packet_hdr);
      hdr = reinterpret_cast<packet_hdr*>(writebuffer + offset);
    }

    if (offset > 0) {
      // ok. everything before the offset is good
      // since we are going to give this buffer away, we need to prepare a new buffer

      // allocate whatever it is going to take to hold next message
      // have we read the incomplete message's header?
      size_t incomplete_message_len = 0;
      if (offset + sizeof(packet_hdr) <= write_buffer_written) incomplete_message_len = hdr->len;

      size_t new_buflen = std::max<size_t>(sizeof(packet_hdr) + incomplete_message_len, RECEIVE_BUFFER_SIZE);
      char* new_writebuffer = (char*)malloc(new_buflen);

      if (write_buffer_len - offset > 0) {
        // copy over to the new buffer everything we will not use
        memcpy(new_writebuffer, writebuffer + offset, write_buffer_written - offset);
      }
      // if we reach here, we have an available block
      // give away the buffer to dc
      dc->deferred_function_call_chunk(writebuffer, offset, associated_proc);
      writebuffer = new_writebuffer;
      write_buffer_written -= offset;
      write_buffer_len = new_buflen;
    } else {
      // nothing ready yet
      // do we have enough room though?
      if (hdr->len + sizeof(packet_hdr) > write_buffer_len) {
        size_t newlen = hdr->len + sizeof(packet_hdr);
        writebuffer = (char*)realloc(writebuffer, newlen);
        write_buffer_len = newlen;
      }
    }
  }
  return get_buffer(retbuflength);
}


void dc_stream_receive::shutdown() { }

} // namespace dc_impl
} // namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_stream_receive.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_STREAM_RECEIVE_HPP
#define DC_STREAM_RECEIVE_HPP
#include <boost/type_traits/is_base_of.hpp>
#include <graphlab/rpc/circular_char_buffer.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <graphlab/rpc/dc_receive.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/logger.hpp>
namespace graphlab {
class distributed_control;

namespace dc_impl {

/**
 * \internal
  \ingroup rpc
  Receiver processor for the dc class.
  The job of the receiver is to take as input a byte stream
  (as received from the socket) and cut it up into meaningful chunks.
  This can be thought of as a receiving end of a multiplexor.
  
  This is the default unbuffered receiver.
*/
class dc_stream_receive: public dc_receive{
 public:
  
  dc_stream_receive(distributed_control* dc, procid_t associated_proc): 
                  writebuffer(NULL), write_buffer_written(0), dc(dc), 
                  associated_proc(associated_proc) { 
    writebuffer = (char*)malloc(RECEIVE_BUFFER_SIZE);
    write_buffer_len = RECEIVE_BUFFER_SIZE;
  }

 private:

  char* writebuffer;
  size_t write_buffer_written;
  size_t write_buffer_len;
  
  /// pointer to the owner
  distributed_control* dc;

  procid_t associated_proc;
  
  void shutdown();

  
  char* get_buffer(size_t& retbuflength);
  

  char* advance_buffer(char* c, size_t wrotelength, 
                              size_t& retbuflength);
  
};


} // namespace dc_impl
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/rpc/dc_tcp_comm.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon UniversityIP addresses.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <netinet/tcp.h>
#include <ifaddrs.h>
#include <poll.h>

#include <limits>
#include <vector>
#include <string>
#include <map>

#include <boost/lexical_cast.hpp>
#include <boost/bind.hpp>
#include <graphlab/logger/logger.hpp>
#include <graphlab/rpc/dc_tcp_comm.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/get_current_process_hash.cpp>
#define compile_barrier() asm volatile("": : :"memory")

#include <graphlab/macros_def.hpp>

// prefix mangling if not Mac
#ifndef __APPLE__
#include <graphlab/rpc/evwrapdef.h>
#endif
#include <event2/event.h>
#include <event2/thread.h>

//#define COMM_DEBUG
namespace graphlab {

  namespace dc_impl {

    void dc_tcp_comm::init(const std::vector<std::string> &machines,
                           const std::map<std::string,std::string> &initopts,
                           procid_t curmachineid,
                           std::vector<dc_receive*> receiver_,
                           std::vector<dc_send*> sender_) {

      curid = curmachineid;
      ASSERT_LT(machines.size(), std::numeric_limits<procid_t>::max());
      nprocs = (procid_t)(machines.size());
      receiver = receiver_;
      sender = sender_;

      // insert machines into the address map
      all_addrs.resize(nprocs);
      portnums.resize(nprocs);
      assert(triggered_timeouts.size() >= nprocs);
      triggered_timeouts.clear();
      // fill all the socks
      sock.resize(nprocs);
      for (size_t i = 0;i < nprocs; ++i) {
        sock[i].id = i;
        sock[i].owner = this;
        sock[i].outsock = -1;
        sock[i].insock = -1;
        sock[i].inevent = NULL;
        sock[i].outevent = NULL;
        sock[i].wouldblock = false;
        sock[i].data.msg_name = NULL;
        sock[i].data.msg_namelen = 0;
        sock[i].data.msg_control = NULL;
        sock[i].data.msg_controllen = 0;
        sock[i].data.msg_flags = 0;
        sock[i].data.msg_iovlen = 0;
        sock[i].data.msg_iov = NULL;
      }

      program_md5 = get_current_process_hash();
      ASSERT_EQ(program_md5.length(), 32);
      // parse the machines list, and extract the relevant address information
      for (size_t i = 0;i < machines.size(); ++i) {
        // extract the port number
        size_t pos = machines[i].find(":");
        ASSERT_NE(pos, std::string::npos);
        std::string address = machines[i].substr(0, pos);
        size_t port = boost::lexical_cast<size_t>(machines[i].substr(pos+1));

        struct hostent* ent = gethostbyname(address.c_str());
        ASSERT_EQ(ent->h_length, 4);
        uint32_t addr = *reinterpret_cast<uint32_t*>(ent->h_addr_list[0]);

        all_addrs[i] = addr;
        ASSERT_LT(port, 65536);
        portnums[i] = (uint16_t)(port);
      }
      network_bytessent = 0;
      buffered_len = 0;
      // if sock handle is set
      std::map<std::string, std::string>::const_iterator iter =
        initopts.find("__sockhandle__");
      if (iter != initopts.end()) {
        open_listening(atoi(iter->second.c_str()));
      } else {
        open_listening();
      }
      // to improve the "synchronous" nature of the connection setup,
      // the last machine will do this in reverse order.
      // To wait for all machines to connect to it, before it
      // tries to connect to all machines.
      // The last machine therefore essentially acts as the "barrier" leader
      if (curid != nprocs - 1) {
        // not the last machine.
        // Connect to everyone, EXCEPT the last machine
        // and wait for all incoming connections
        for(procid_t i = 0;i < nprocs - 1; ++i) connect(i);

        // wait for p - 1 incoming connections
        insock_lock.lock();
        while(1) {
          if (num_in_connected() == sock.size() - 1) break;
          insock_cond.wait(insock_lock);
        }
        insock_lock.unlock();

        // connect to the last machine
        connect(nprocs - 1);

        // wait for the last connection
        insock_lock.lock();
        while(1) {
          if (num_in_connected() == sock.size()) break;
          insock_cond.wait(insock_lock);
        }
        insock_lock.unlock();
      } else {
        // I am the last machine
        ASSERT_EQ(curid, nprocs - 1);
        // wait for all incoming connections before connecting to everyone
        // connect to myself
        connect(nprocs - 1);
        insock_lock.lock();
        while(1) {
          if (num_in_connected() == sock.size()) break;
          insock_cond.wait(insock_lock);
        }
        insock_lock.unlock();
        // now, when I know that machines 0 to #procs - 2 have
        // all established a connection to each other
        // connect to everyone. This is essentially equivalent to the
        // barrier release message
        for(size_t i = 0;i < nprocs; ++i) connect(i);
      }
      // everyone is connected.
      // Construct the eventbase
      construct_events();
      // we reserve the last 2 cores for communication
      inthreads.launch(boost::bind(&dc_tcp_comm::receive_loop, this, inevbase), thread::cpu_count() - 2);
      outthreads.launch(boost::bind(&dc_tcp_comm::send_loop, this, outevbase), thread::cpu_count() - 1);
      is_closed = false;
    }

    void dc_tcp_comm::construct_events() {
      int ret = evthread_use_pthreads();
      if (ret < 0) logstream(LOG_FATAL) << "Unable to initialize libevent with pthread support!" << std::endl;
      // number of evs to create.
      outevbase = event_base_new();
      if (!outevbase) logstream(LOG_FATAL) << "Unable to construct libevent base" << std::endl;
      send_all_timeout.owner = this;
      send_all_timeout.send_all = true;
      send_triggered_timeout.owner = this;
      send_triggered_timeout.send_all = false;
      send_all_event = event_new(outevbase, -1, EV_TIMEOUT | EV_PERSIST, on_send_event, &(send_all_timeout));
      assert(send_all_event != NULL);
      struct timeval t = {SEND_POLL_TIMEOUT / 1000000, SEND_POLL_TIMEOUT %  1000000} ;
      event_add(send_all_event, &t);
      send_triggered_event = event_new(outevbase, -1, EV_TIMEOUT | EV_PERSIST, on_send_event, &(send_triggered_timeout));
      assert(send_triggered_event != NULL);

      inevbase = event_base_new();
      if (!inevbase) logstream(LOG_FATAL) << "Unable to construct libevent base" << std::endl;


      //register all event objects
      for (size_t i = 0;i < sock.size(); ++i) {
        sock[i].inevent = event_new(inevbase, sock[i].insock, EV_READ | EV_PERSIST | EV_ET,
                                     on_receive_event, &(sock[i]));
        if (sock[i].inevent == NULL) {
          logstream(LOG_FATAL) << "Unable to register socket read event" << std::endl;
        }

        sock[i].outevent = event_new(outevbase, sock[i].outsock, EV_WRITE | EV_PERSIST | EV_ET,
                                     on_send_event, &(sock[i]));
        if (sock[i].outevent == NULL) {
          logstream(LOG_FATAL) << "Unable to register socket write event" << std::endl;
        }

        event_add(sock[i].inevent, NULL);
        //struct timeval t = {0, 10};
        event_add(sock[i].outevent, NULL);
      }
    }

    size_t dc_tcp_comm::num_in_connected() const {
      size_t connected = 0;
      for (size_t i = 0;i < sock.size(); ++i) {
        connected += (sock[i].insock != -1);
      }
      return connected;
    }

    void dc_tcp_comm::trigger_send_timeout(procid_t target, bool urgent) {
      if (!urgent) {
        if (sock[target].wouldblock == false &&
            triggered_timeouts.get(target) == false) {
          triggered_timeouts.set_bit(target);
          event_active(send_triggered_event, EV_TIMEOUT, 1);
        }
      }
      else {
        process_sock(&(sock[target]));
      }
    }


    void dc_tcp_comm::close() {
      if (is_closed) return;
      logstream(LOG_INFO) << "Closing listening socket" << std::endl;
      // close the listening socket
      if (listensock > 0) {
        ::close(listensock);
        listensock = -1;
      }
      // shutdown the listening thread
      listenthread.join();

      // clear the outevent loop
      event_base_loopbreak(outevbase);
      outthreads.join();
      for (size_t i = 0;i < sock.size(); ++i) {
        event_free(sock[i].outevent);
      }
      event_free(send_triggered_event);
      event_free(send_all_event);
      event_base_free(outevbase);


      logstream(LOG_INFO) << "Closing outgoing sockets" << std::endl;
      // close all outgoing sockets
      for (size_t i = 0;i < sock.size(); ++i) {
        if (sock[i].outsock > 0) {
          ::close(sock[i].outsock);
          sock[i].outsock = -1;
        }
      }

      // clear the inevent loop
      event_base_loopbreak(inevbase);
      inthreads.join();
      for (size_t i = 0;i < sock.size(); ++i) {
        event_free(sock[i].inevent);
      }
      event_base_free(inevbase);


      logstream(LOG_INFO) << "Closing incoming sockets" << std::endl;
      // close all incoming sockets
      for (size_t i = 0;i < sock.size(); ++i) {
        if (sock[i].insock > 0) {
          ::close(sock[i].insock);
          sock[i].insock = -1;
        }
      }
      is_closed = true;
    }


    bool dc_tcp_comm::send_till_block(socket_info& sockinfo) {
      sockinfo.wouldblock = false;
      // while there is still data to be sent
      BEGIN_TRACEPOINT(tcp_send_call);
      while(!sockinfo.outvec.empty()) {
        sockinfo.outvec.fill_msghdr(sockinfo.data);
        ssize_t ret = sendmsg(sockinfo.outsock, &sockinfo.data, 0);
        if (ret < 0) {
          END_TRACEPOINT(tcp_send_call);
          if (errno == EWOULDBLOCK || errno == EAGAIN) {
            sockinfo.wouldblock = true;
            return false;
          }
          else {
            logstream(LOG_FATAL) << "send error: " << strerror(errno) << std::endl;
            return false;
          }
        }

#ifdef COMM_DEBUG
        logstream(LOG_INFO) << ret << " bytes --> " << sockinfo.id << std::endl;
#endif
        network_bytessent.inc(ret);
        sockinfo.outvec.sent(ret);
      }
      END_TRACEPOINT(tcp_send_call);
      return true;
    }

    int dc_tcp_comm::sendtosock(int sockfd, const char* buf, size_t len) {
      size_t numsent = 0;
      BEGIN_TRACEPOINT(tcp_send_call);
      while (numsent < len) {
        ssize_t ret = ::send(sockfd, buf + numsent, len - numsent, 0);
        if (ret < 0) {
          logstream(LOG_ERROR) << "send error: " << strerror(errno) << std::endl;
          END_TRACEPOINT(tcp_send_call);
          return errno;
        }
        numsent += ret;
      }
      END_TRACEPOINT(tcp_send_call);
      return 0;
    }

    void dc_tcp_comm::set_tcp_no_delay(int fd) {
      int flag = 1;
      int result = setsockopt(fd,            /* socket affected */
                              IPPROTO_TCP,     /* set option at TCP level */
                              TCP_NODELAY,     /* name of option */
                              (char *) &flag,
                              sizeof(int));
      if (result < 0) {
        logstream(LOG_WARNING)
          << "Unable to disable Nagle. Performance may be signifantly reduced"
          << std::endl;
      }
      // set nonblocking
    }

    void dc_tcp_comm::set_non_blocking(int fd) {
      int flag = fcntl(fd, F_GETFL);
      if (flag < 0) {
        logstream(LOG_FATAL) << "Unable to get socket flags" << std::endl;
      }
      flag |= O_NONBLOCK;
      if (fcntl(fd, F_SETFL, flag) < 0) {
        logstream(LOG_FATAL) << "Unable to set socket as non-blocking" << std::endl;
      }

    }


    void dc_tcp_comm::new_socket(int newsock, sockaddr_in* otheraddr,
                                 procid_t id) {
      // figure out the address of the incoming connection
      uint32_t addr = *reinterpret_cast<uint32_t*>(&(otheraddr->sin_addr));
      // locate the incoming address in the list
      logstream(LOG_INFO) << "Incoming connection from "
                          << inet_ntoa(otheraddr->sin_addr) << std::endl;
      ASSERT_LT(id, all_addrs.size());
      ASSERT_EQ(all_addrs[id], addr);
      insock_lock.lock();
      ASSERT_EQ(sock[id].insock, -1);
      sock[id].insock = newsock;
      insock_cond.signal();
      insock_lock.unlock();
      logstream(LOG_INFO) << "Proc " << procid() << " accepted connection "
                          << "from machine " << id << std::endl;
    }


    void dc_tcp_comm::open_listening(int sockhandle) {
      // open listening socket
      if (sockhandle == 0) {
        listensock = socket(AF_INET, SOCK_STREAM, 0);
        // uninteresting boiler plate. Set the port number and socket type
        sockaddr_in my_addr;
        my_addr.sin_family = AF_INET;
        my_addr.sin_port = htons(portnums[curid]);
        my_addr.sin_addr.s_addr = INADDR_ANY;
        memset(&(my_addr.sin_zero), '\0', 8);
        logstream(LOG_INFO) << "Proc " << procid() << " Bind on "
                            << portnums[curid] << "\n";
        if (bind(listensock, (sockaddr*)&my_addr, sizeof(my_addr)) < 0)
          {
            logstream(LOG_FATAL) << "bind: " << strerror(errno) << "\n";
            ASSERT_TRUE(0);
          }
      }
      else {
        listensock = sockhandle;
      }
      logstream(LOG_INFO) << "Proc " << procid()
                          << " listening on " << portnums[curid] << "\n";
      ASSERT_EQ(0, listen(listensock, 128));
      // spawn a thread which loops around accept
      listenthread.launch(boost::bind(&dc_tcp_comm::accept_handler, this));
    } // end of open_listening

    void dc_tcp_comm::connect(size_t target) {
      if (sock[target].outsock != -1) {
        return;
      } else {
        int newsock = socket(AF_INET, SOCK_STREAM, 0);
        set_tcp_no_delay(newsock);
        sockaddr_in serv_addr;
        serv_addr.sin_family = AF_INET;
        // set the target port
        serv_addr.sin_port = htons(portnums[target]);
        // set the target address
        serv_addr.sin_addr = *(struct in_addr*)&(all_addrs[target]);
        memset(&(serv_addr.sin_zero), '\0', 8);
        // Connect!
        logstream(LOG_INFO) << "Trying to connect from "
                            << curid << " -> " << target
                            << " on port " << portnums[target] << "\n";
        logger(LOG_INFO, "Destination IP = %s", inet_ntoa(serv_addr.sin_addr));
        // retry 10 times at 1 second intervals
        bool success = false;
        for (size_t i = 0;i < 10; ++i) {
          if (::connect(newsock, (sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
            logstream(LOG_INFO)
              << "connect " << curid << " to " << target << ": "
              << strerror(errno) << ". Retrying...\n";
            timer::sleep(1);
            // posix says that
            /* If connect() fails, the state of the socket is unspecified.
               Conforming applications should close the file descriptor and
               create a new socket before attempting to reconnect. */
            ::close(newsock);
            newsock = socket(AF_INET, SOCK_STREAM, 0);
            set_tcp_no_delay(newsock);
          } else {
            // send the initial message
            initial_message msg; 
            msg.id = curid;
            memcpy(msg.md5, program_md5.c_str(), 32);
            sendtosock(newsock, reinterpret_cast<char*>(&msg), sizeof(initial_message));
            set_non_blocking(newsock);
            success = true;
            break;
          }
        }
        if (!success) {
          logstream(LOG_FATAL) << "Failed to establish connection" << std::endl;
        }
        // remember the socket
        sock[target].outsock = newsock;
        logstream(LOG_INFO) << "connection from " << curid << " to " << target
                            << " established." << std::endl;
      }
    } // end of connect


////////////////////////////////////////////////////////////////////////////
//       These stuff run in seperate threads                              //
////////////////////////////////////////////////////////////////////////////

    // waits for incoming connections
    void dc_tcp_comm::accept_handler() {
      pollfd pf;
      pf.fd = listensock;
      pf.events = POLLIN;
      pf.revents = 0;
      size_t numsocks_connected = 0;
      logstream(LOG_INFO) << "Listening thread launched." << std::endl;
      while(numsocks_connected < sock.size()) {
        // wait for incoming event
        poll(&pf, 1, 1000);
        // if we have a POLLIN, we have an incoming socket request
        if (pf.revents & POLLIN) {
          logstream(LOG_INFO) << "Accepting...." << std::endl;
          // accept the socket
          sockaddr_in their_addr;
          socklen_t namelen = sizeof(sockaddr_in);
          int newsock = accept(listensock, (sockaddr*)&their_addr, &namelen);
          logstream(LOG_INFO) << "Accepted" << std::endl;
          if (newsock < 0) {
            break;
          }
          // set the socket options and inform the
          set_tcp_no_delay(newsock);
          // before accepting the socket, get the machine number
          initial_message remote_message;
          ssize_t msglen = 0;
          while(msglen != sizeof(initial_message)) {
            int retval = recv(newsock, (char*)(&remote_message) + msglen,
                           sizeof(initial_message) - msglen, 0);
            if (retval < 0) {
              if (errno == EWOULDBLOCK || errno == EAGAIN) {
                continue;
              }
              else {
                logstream(LOG_FATAL) << "error: " << errno <<  " receive error: " << strerror(errno) << std::endl;
              }
            }
            else if (retval > 0) {
              msglen += retval;
            }
            else if (retval == 0) {
              std::cout << "error: connection dropped." << std::endl;
              ::close(newsock);
              newsock = -1;
              break;
            }
          }
          if (newsock != -1) {
            // validate the md5 hash
            std::string other_md5 = std::string(remote_message.md5, 32);
            if (other_md5 != program_md5) {
              logstream(LOG_FATAL) << "MD5 mismatch. \n "
                                   << "\tProcess " << curid << " has hash "  << program_md5 << " \n "
                                   << "\tProcess " << remote_message.id << " has hash "  << other_md5 << " \n "
                                   << "\tGraphLab requires all machines to run exactly the same binary." << std::endl;
            }
            // register the new socket
            set_non_blocking(newsock);
            new_socket(newsock, &their_addr, remote_message.id);
            ++numsocks_connected;
          }
        }
        if (listensock == -1) {
          // the owner has closed
          break;
        }
      }
      logstream(LOG_INFO) << "Listening thread quitting" << std::endl;
    } // end of run


    // libevent receive handler
    void on_receive_event(int fd, short ev, void* arg) {
      dc_tcp_comm::socket_info* sockinfo = (dc_tcp_comm::socket_info*)(arg);
      dc_tcp_comm* comm = sockinfo->owner;
      if (ev & EV_READ) {
        // get a direct pointer to my receiver
        dc_receive* receiver = comm->receiver[sockinfo->id];

        size_t buflength;
        char *c = receiver->get_buffer(buflength);
        while(1) {
          ssize_t msglen = recv(fd, c, buflength, 0);
          if (msglen < 0) {
            if (errno == EAGAIN || errno == EWOULDBLOCK) break;
            else {
              logstream(LOG_FATAL) << "receive error: " << strerror(errno) << std::endl;
              break;
            }
          }
          else if (msglen == 0) {
            // socket closed
            break;
          }
          else if (msglen > 0) {
            comm->network_bytesreceived.inc(msglen);
    #ifdef COMM_DEBUG
            logstream(LOG_INFO) << msglen << " bytes <-- "
                                << sockinfo->id  << std::endl;
    #endif
            c = receiver->advance_buffer(c, msglen, buflength);
          }
        }
      }
    }

    void dc_tcp_comm::receive_loop(struct event_base* ev) {
      logstream(LOG_INFO) << "Receive loop Started" << std::endl;
      int ret = event_base_dispatch(ev);
      if (ret != 0) {
        logstream(LOG_FATAL) << "Receive loop Quit with " << ret << std::endl;
      }
      else {
        logstream(LOG_INFO) << "Receive loop Stopped" << std::endl;
      }
    }


    void dc_tcp_comm::check_for_new_data(dc_tcp_comm::socket_info& sockinfo) {
      buffered_len.inc(sender[sockinfo.id]->get_outgoing_data(sockinfo.outvec));
    }


    inline void process_sock(dc_tcp_comm::socket_info* sockinfo) {
      if (sockinfo->m.try_lock()) {
        dc_tcp_comm* comm = sockinfo->owner;
        // get a direct pointer to my receiver
        if (sockinfo->wouldblock == false) {
          comm->check_for_new_data(*sockinfo);
          if (!sockinfo->outvec.empty()) {
            comm->send_till_block(*sockinfo);
          }
        }
        sockinfo->m.unlock();
      }
    }

    // libevent receive handler
    void on_send_event(int fd, short ev, void* arg) {
      if (ev & EV_WRITE) {
        dc_tcp_comm::socket_info* sockinfo = (dc_tcp_comm::socket_info*)(arg);
        sockinfo->wouldblock = false;
        process_sock(sockinfo);
     }
      else if (ev & EV_TIMEOUT) {
        dc_tcp_comm::timeout_event* te =  (dc_tcp_comm::timeout_event*)(arg);
        dc_tcp_comm* comm = te->owner;
        if (te->send_all == false) {
          // this is a triggered event
          foreach(uint32_t i, comm->triggered_timeouts) {
            comm->triggered_timeouts.clear_bit(i);
            dc_tcp_comm::socket_info* sockinfo = &(comm->sock[i]);
            process_sock(sockinfo);
          }
        } else {
          // send all event
          for(uint32_t i = 0;i < comm->sock.size(); ++i) {
            dc_tcp_comm::socket_info* sockinfo = &(comm->sock[i]);
            process_sock(sockinfo);
          }
        }
      }
    }


    void dc_tcp_comm::send_loop(struct event_base* ev) {
      logstream(LOG_INFO) << "Send loop Started" << std::endl;
      int ret = event_base_dispatch(ev);
      if (ret != 0) {
        logstream(LOG_FATAL) << "Send loop Quit with " << ret << std::endl;
      }
      else {
        logstream(LOG_INFO) << "Send loop Stopped" << std::endl;
      }
    }
  }; // end of namespace dc_impl
}; // end of namespace graphlab


================================================
FILE: src/graphlab/rpc/dc_tcp_comm.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DC_TCP_COMM_HPP
#define DC_TCP_COMM_HPP

#include <sys/socket.h>
#include <netinet/in.h>

#include <vector>
#include <string>
#include <map>

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_comm_base.hpp>
#include <graphlab/rpc/circular_iovec_buffer.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/util/dense_bitset.hpp>

#ifndef __APPLE__
// prefix mangling if not Mac
#include <graphlab/rpc/evwrapdef.h>
#endif
#include <event2/event.h>
namespace graphlab {
namespace dc_impl {


void on_receive_event(int fd, short ev, void* arg);
void on_send_event(int fd, short ev, void* arg);

/**
 \ingroup rpc
 \internal
TCP implementation of the communications subsystem.
Provides a single object interface to sending/receiving data streams to
a collection of machines.
*/
class dc_tcp_comm:public dc_comm_base {
 public:

  DECLARE_TRACER(tcp_send_call);

  inline dc_tcp_comm() {
    is_closed = true;
    INITIALIZE_TRACER(tcp_send_call, "dc_tcp_comm: send syscall");
  }

  size_t capabilities() const {
    return COMM_STREAM;
  }

  /**
   this fuction should pause until all communication has been set up
   and returns the number of systems in the network.
   After which, all other remaining public functions (numprocs(), send(), etc)
   should operate normally. Every received message should immediate trigger the
   attached receiver

   machines: a vector of strings where each string is of the form [IP]:[portnumber]
   initopts: unused
   curmachineid: The ID of the current machine. machines[curmachineid] will be
                 the listening address of this machine

   recvcallback: A function pointer to the receiving function. This function must be thread-safe
   tag: An additional pointer passed to the receiving function.
  */
  void init(const std::vector<std::string> &machines,
            const std::map<std::string,std::string> &initopts,
            procid_t curmachineid,
            std::vector<dc_receive*> receiver,
            std::vector<dc_send*> senders);

  /** shuts down all sockets and cleans up */
  void close();

  ~dc_tcp_comm() {
    close();
  }

  inline bool channel_active(size_t target) const {
    return (sock[target].outsock != -1);
  }

  /**
    Returns the number of machines in the network.
    Only valid after call to init()
  */
  inline procid_t numprocs() const {
    return nprocs;
  }

  /**
   * Returns the current machine ID.
   * Only valid after call to init()
   */
  inline procid_t procid() const {
    return curid;
  }

  /**
   * Returns the total number of bytes sent
   */
  inline size_t network_bytes_sent() const {
    return network_bytessent.value;
  }

  /**
   * Returns the total number of bytes received
   */
  inline size_t network_bytes_received() const {
    return network_bytesreceived.value;
  }

  inline size_t send_queue_length() const {
    size_t a = network_bytessent.value;
    size_t b = buffered_len.value;
    return b - a;
  }

  /**
   Sends the string of length len to the target machine dest.
   Only valid after call to init();
   Establishes a connection if necessary
  */
  void send(size_t target, const char* buf, size_t len);

  void trigger_send_timeout(procid_t target, bool urgent);

 private:
  /// Sets TCP_NO_DELAY on the socket passed in fd
  void set_tcp_no_delay(int fd);

  void set_non_blocking(int fd);

  /// called when listener receives an incoming socket request
  void new_socket(int newsock, sockaddr_in* otheraddr, procid_t remotemachineid);


  /// The number of incoming connections established
  size_t num_in_connected() const;

  /** opens the listening sock and spawns a thread to listen on it.
   * Uses sockhandle if non-zero
   */
  void open_listening(int sockhandle = 0);


  /// constructs a connection to the target machine
  void connect(size_t target);

  /// wrapper around the standard send. but loops till the buffer is all sent
  int sendtosock(int sockfd, const char* buf, size_t len);


  procid_t curid;   /// if od the current processor
  procid_t nprocs;  /// number of processors
  bool is_closed;   /// whether this socket is closed

  std::string program_md5;  /// MD5 hash of current program


  /// all_addrs[i] will contain the IP address of machine i
  std::vector<uint32_t> all_addrs;
  std::map<uint32_t, procid_t> addr2id;
  std::vector<uint16_t> portnums;

  std::vector<dc_receive*> receiver;
  std::vector<dc_send*> sender;
  atomic<size_t> buffered_len;


  struct initial_message {
    procid_t id;
    char md5[32];
  };


  /// All information about stuff regarding a particular sock
  /// Passed to the receive handler
  struct socket_info{
    size_t id;    /// which machine this is connected to
    dc_tcp_comm* owner; /// this object
    int outsock;  /// FD of the outgoing socket
    int insock;   /// FD of the incoming socket
    struct event* inevent;  /// event object for incoming information
    struct event* outevent;  /// event object for outgoing information
    bool wouldblock;
    mutex m;

    circular_iovec_buffer outvec;  /// outgoing data
    struct msghdr data;
  };

  mutex insock_lock; /// locks the insock field in socket_info
  conditional insock_cond; /// triggered when the insock field in socket_info changes

  struct timeout_event {
    bool send_all;
    dc_tcp_comm* owner;
  };

  std::vector<socket_info> sock;

  /**
   * Sends as much of the buffer inside the sockinfo as possible
   * until the send call will block or all sends are complete.
   * Returns true when the buffer has been completely sent
   * If wouldblock returns true, the next call to send_till_block may block
   */
  void send_all(socket_info& sockinfo);
  bool send_till_block(socket_info& sockinfo);
  void check_for_new_data(socket_info& sockinfo);
  void construct_events();


  // counters
  atomic<size_t> network_bytessent;
  atomic<size_t> network_bytesreceived;

  ////////////       Receiving Sockets      //////////////////////
  thread_group inthreads;
  void receive_loop(struct event_base*);

  friend void process_sock(socket_info* sockinfo);
  friend void on_receive_event(int fd, short ev, void* arg);
  struct event_base* inevbase;


  ////////////       Sending Sockets      //////////////////////
  thread_group outthreads;
  void send_loop(struct event_base*);
  friend void on_send_event(int fd, short ev, void* arg);
  struct event_base* outevbase;
  struct event* send_triggered_event;
  struct event* send_all_event;
  timeout_event send_triggered_timeout;
  timeout_event send_all_timeout;

  fixed_dense_bitset<256> triggered_timeouts;
  ////////////       Listening Sockets     //////////////////////
  int listensock;
  thread listenthread;
  void accept_handler();
};

void process_sock(dc_tcp_comm::socket_info* sockinfo);

} // namespace dc_impl
} // namespace graphlab

#ifndef __APPLE__
// prefix mangling if not Mac
#include <graphlab/rpc/evwrapundef.h>
#endif

#endif


================================================
FILE: src/graphlab/rpc/dc_thread_get_send_buffer.hpp
================================================
#ifndef GRAPHLAB_RPC_DC_DEPENDENCY_SPLIT_HPP
#define GRAPHLAB_RPC_DC_DEPENDENCY_SPLIT_HPP

/*
 * This implements a bunch of internal functions which should really reside
 * as static functions in distributed_control. But 
 *
 */
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/thread_local_send_buffer.hpp>

namespace graphlab {
namespace dc_impl {
extern pthread_key_t thrlocal_send_buffer_key;
extern pthread_key_t thrlocal_sequentialization_key;

/**
 * \internal
 * Obtains the thread local send buffer for a given target
 */
inline oarchive* get_thread_local_buffer(procid_t target) {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p == NULL) {
    p = new thread_local_buffer;
    pthread_setspecific(thrlocal_send_buffer_key, (void*)p);
  }
  return p->acquire(target);
}

/**
 * \internal
 * Releases the thread local send buffer for the given target
 */
inline void release_thread_local_buffer(procid_t target, 
                                        bool do_not_count_bytes_sent) {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  p->release(target, do_not_count_bytes_sent);
}

/**
 * \internal
 * Writes a sequence of bytes to the local send buffer
 */
inline void write_thread_local_buffer(procid_t target, 
                                      char* c,
                                      size_t len,
                                      bool do_not_count_bytes_sent) {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  p->write(target, c, len, do_not_count_bytes_sent);
}


/**
 * \internal
 */
inline void push_flush_thread_local_buffer() {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p) p->push_flush();
}

/**
 * \internal
 */
inline void pull_flush_thread_local_buffer(procid_t proc) {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p) p->pull_flush(proc);
}

/**
 * \internal
 */
inline void pull_flush_soon_thread_local_buffer(procid_t proc) {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p) p->pull_flush_soon(proc);
}


/**
 * \internal
 */
inline void pull_flush_soon_thread_local_buffer() {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p) p->pull_flush_soon();
}


/**
 * Gets the current procid.
 * This function really exists to split the dependency between this header and
 * dc.hpp
 */
inline procid_t _get_procid() {
  void* ptr = pthread_getspecific(thrlocal_send_buffer_key);
  thread_local_buffer* p = (thread_local_buffer*)(ptr);
  if (p == NULL) {
    p = new thread_local_buffer;
    pthread_setspecific(thrlocal_send_buffer_key, (void*)p);
  }
  return p->procid;
}

/**
 * Get the current sequentialization key.
 * This function really exists to split the dependency between this header and
 * dc.hpp
 */
inline procid_t _get_sequentialization_key() {
  size_t oldval = reinterpret_cast<size_t>(pthread_getspecific(dc_impl::thrlocal_sequentialization_key));
  return (unsigned char)oldval;
}

}
}
#endif


================================================
FILE: src/graphlab/rpc/dc_types.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef DISTRIBUTED_CONTROL_TYPES_HPP
#define DISTRIBUTED_CONTROL_TYPES_HPP
#include <inttypes.h>
#include <graphlab/serialization/iarchive.hpp>
namespace graphlab {
  /// The type used for numbering processors \ingroup rpc
  typedef uint16_t procid_t;

  /**
   * \internal
   * \ingroup rpc
   * The underlying communication protocol
   */
  enum dc_comm_type {
    TCP_COMM,   ///< TCP/IP
    SCTP_COMM   ///< SCTP (limited support)
  };


  /**
   * \internal
   * \ingroup rpc
   * A pointer that points directly into
   * the middle of a deserialized buffer.
   */
  struct wild_pointer {
    const void* ptr;

    void load(iarchive& iarc) {
      assert(iarc.buf != NULL);
      ptr = reinterpret_cast<const void*>(iarc.buf + iarc.off);
    }
  };
};
#include <graphlab/rpc/dc_packet_mask.hpp>
#endif


================================================
FILE: src/graphlab/rpc/delta_dht.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#include <boost/unordered_map.hpp>
#include <graphlab/rpc/delta_dht.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {
  namespace delta_dht_impl {

    typedef boost::unordered_map<const void*, icache*> cache_map_type;
  
    void destroy_tls_data(void* ptr) {
      cache_map_type* cache_map_ptr = static_cast<cache_map_type*>(ptr);
      if(cache_map_ptr != NULL) { 
        cache_map_type& cache_map = *cache_map_ptr;
        typedef cache_map_type::value_type pair_type;
        foreach(pair_type& pair, cache_map) {
          if(pair.second != NULL) { 
            delete pair.second; 
            pair.second = NULL;
          }
        }
        delete cache_map_ptr;
      }
    }
    struct tls_key_creator {
      pthread_key_t TLS_KEY;
      tls_key_creator() : TLS_KEY(0) {
        pthread_key_create(&TLS_KEY, destroy_tls_data);
      }
    }; 
    const tls_key_creator key; 
    
    icache*& get_icache_ptr(const void* dht_ptr) {
      cache_map_type* cache_map_ptr = static_cast<cache_map_type*> 
        (pthread_getspecific(key.TLS_KEY));
      if(cache_map_ptr == NULL) {
        cache_map_ptr = new cache_map_type();
        pthread_setspecific(key.TLS_KEY, cache_map_ptr);
      }
      ASSERT_NE(cache_map_ptr, NULL);
      ASSERT_NE(dht_ptr, NULL);
      return (*cache_map_ptr)[dht_ptr];
    }

    
  }; // end of delta dht impl
}; // end of graphlab namespace


================================================
FILE: src/graphlab/rpc/delta_dht.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */


#ifndef GRAPHLAB_DELTA_DHT_HPP
#define GRAPHLAB_DELTA_DHT_HPP


#include <boost/unordered_map.hpp>
#include <boost/functional/hash.hpp>


#include <graphlab/rpc/dc.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/cache.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {

  namespace delta_dht_impl {
    struct icache { virtual ~icache() { } };
    icache*& get_icache_ptr(const void* dht_ptr);    
  }; // end of namespace delta_dht_impl


  namespace delta_predicate {
    template<typename ValueType, typename DeltaType>
    struct uses {
      size_t max_uses;
      uses(size_t max_uses = 100) : max_uses(max_uses) { }
      //! returns true if the predicate 
      bool operator()(const ValueType& current,
                      const DeltaType& delta,
                      const size_t& uses) const {
        return uses < max_uses;
      }
    }; // end of uses

  }; // end of eviction predicates


  template<typename KeyType, typename ValueType,
           typename DeltaType = ValueType>
  class delta_dht {
  public:
    typedef KeyType   key_type;
    typedef ValueType value_type;
    typedef DeltaType delta_type;

    typedef size_t    size_type;    
    
    typedef boost::unordered_map<key_type, value_type> data_map_type;
    
    struct cache_entry {
      value_type value;
      delta_type delta;
      size_t uses;
      cache_entry(const value_type& value = value_type()) : 
        value(value), uses(0) { }
    };

    typedef cache::lru<key_type, cache_entry> cache_type;

  private:

    //! The remote procedure call manager 
    mutable dc_dist_object<delta_dht> rpc;


    //! The data stored locally on this machine
    data_map_type  data_map;

    //! The lock for the data map
    mutex data_lock;

    //! The master cache
    cache_type cache;

    //! The master cash rw lock
    mutex cache_lock;
  
    //! The maximum cache size
    size_t max_cache_size;

    size_t max_uses;

    //! the hash function
    boost::hash<key_type> hash_function;

    //! cache hits and misses
    mutable atomic<size_t> local; 
    mutable atomic<size_t> hits;
    mutable atomic<size_t> misses;
    mutable atomic<size_t> background_updates;

  public:

    delta_dht(distributed_control& dc, 
              size_t max_cache_size = 2056) : 
      rpc(dc, this), 
      max_cache_size(max_cache_size), max_uses(10) {
      rpc.barrier();
    }

    ~delta_dht() { rpc.full_barrier(); }
    
    void set_max_uses(size_t max) { max_uses = max; }

    size_t cache_local() const { return local.value; }
    size_t cache_hits() const { return hits.value; }
    size_t cache_misses() const { return misses.value; }
    size_t background_syncs() const { return background_updates.value; }

    size_t cache_size() const { 
      cache_lock.lock();
      const size_t ret_val = cache.size(); 
      cache_lock.unlock();
      return ret_val;
    }

    bool is_cached(const key_type& key) const { 
      cache_lock.lock();
      const bool ret_value = cache.contains(key); 
      cache_lock.unlock();
      return ret_value;
    }


    value_type operator[](const key_type& key) {     
      if(is_local(key)) {
        ++local;
        data_lock.lock();
        const value_type value = data_map[key];
        data_lock.unlock();
        return value;
      } else { // on a remote machine check the cache    
        // test for the key in the cache
        cache_lock.lock();
        if(cache.contains(key)) {
          ++hits;
          const value_type ret_value = cache[key].value;
          cache_lock.unlock();
          return ret_value;
        } else { // need to create a cache entry
          ++misses;
          // Free space in the cache if necessary
          while(cache.size() + 1 > max_cache_size) {
            ASSERT_GT(cache.size(), 0);
            const std::pair<key_type, cache_entry> pair = cache.evict();
            const key_type& key                         = pair.first;
            const cache_entry& entry                    = pair.second;
            send_delta(key, entry.delta);
          }          
          // get the new entry from the server
          const value_type ret_value = (cache[key].value = get_master(key));
          cache_lock.unlock();
          return ret_value;
        }
      }
    } // end of operator []
    

    void apply_delta(const key_type& key, const delta_type& delta) {
      if(is_local(key)) {
        data_lock.lock();
        data_map[key] += delta;
        data_lock.unlock();
      } else {
        // update the cache entry if availablable
        cache_lock.lock();
        if(cache.contains(key)) {
          cache_entry& entry = cache[key];
          entry.value += delta;
          entry.delta += delta;               
          if( entry.uses > max_uses ) {
            const delta_type accum_delta = entry.delta;
            entry.delta = delta_type();
            entry.uses = 0;
            cache_lock.unlock();
            send_delta(key, accum_delta);
            return;
          }
        }
        cache_lock.unlock();          
      }
    }


    //! empty the local cache
    void flush() {
      cache_lock.lock();
      while(cache.size() > 0) {
        const std::pair<key_type, cache_entry> pair = cache.evict();
        const key_type& key = pair.first;
        const cache_entry& entry = pair.second;
        send_delta(key, entry.delta);
      }
      cache_lock.unlock();
    }


    //! empty the local cache
    void barrier_flush() {
      flush();
      rpc.full_barrier();
    }
    
    
    void synchronize() {
      typedef typename cache_type::pair_type pair_type;
      cache_lock.lock();
      foreach(pair_type& pair, cache) {
        key_type& key = pair.first;
        cache_entry& entry = pair.second;
        if(entry.uses > 0) {
          const delta_type accum_delta = entry.delta;
          entry.delta = delta_type();
          entry.uses = 0;
          send_delta(key, accum_delta);
        }
      } // end of foreach
      cache_lock.unlock();
    }


    void synchronize(const key_type& key) {
      if(is_local(key)) return;
      cache_lock.lock();
      if(cache.contains(key)) {
        cache_entry& entry = cache[key];
        const delta_type accum_delta = entry.delta;
        entry.delta = delta_type();
        entry.uses = 0;
        cache_lock.unlock();
        send_delta(key, accum_delta);
      } else cache_lock.unlock();
    }


    size_t owning_cpu(const key_type& key) const {
      const size_t hash_value = hash_function(key);
      const size_t cpuid = hash_value % rpc.numprocs();
      return cpuid;
    }
      

    bool is_local(const key_type& key) const {
      return owning_cpu(key) == rpc.procid();
    } // end of is local   


    delta_type delta(const key_type& key) const {
      if(!is_local(key)) {
        cache_lock.lock();
        if(cache.contains(key)) {
          const delta_type delta = cache[key].delta;
          cache_lock.unlock();
          return delta;
        }
        cache_lock.unlock();
      }
      return delta_type();
    }


    size_t local_size() const {
      data_lock.lock();
      const size_t result = data_map.size(); 
      data_lock.unlock();
      return result;
    }


    size_t size() const {
      size_t sum = 0;
      for(size_t i = 0; i < rpc.numprocs(); ++i) {
        if(i == rpc.procid()) sum += local_size();
        else sum += rpc.remote_request(i, &delta_dht::local_size); 
      }
      return sum;
    }

    size_t numprocs() const { return rpc.num_procs(); }
    size_t procid() const { return rpc.procid(); }


    value_type get_master(const key_type& key) {
      // If the data is stored locally just read and return
      if(is_local(key)) {
        data_lock.lock();
        const value_type ret_value = data_map[key];
        data_lock.unlock();
        return ret_value;
      } else {
        return rpc.remote_request(owning_cpu(key), 
                                  &delta_dht::get_master, key);
      }
    } // end of direct get
    
  private:
    
    void send_delta(const key_type& key, const delta_type& delta)  {
      // If the data is stored locally just read and return
      ASSERT_FALSE(is_local(key));
      const size_t calling_procid = procid();
      rpc.remote_call(owning_cpu(key), 
                      &delta_dht::send_delta_rpc, 
                      calling_procid, key, delta);
      
    } // end of send_delta
    
    void send_delta_rpc(const size_t& calling_procid, 
                        const key_type& key, const delta_type& delta)  {
      // If the data is stored locally just read and return
      ASSERT_TRUE(is_local(key));
      data_lock.lock(); 
      const value_type ret_value = (data_map[key] += delta);
      data_lock.unlock();
      rpc.remote_call(calling_procid, 
                      &delta_dht::send_delta_rpc_callback, key, ret_value);      
      
    } // end of send_delta_rpc
    
    void send_delta_rpc_callback(const key_type& key, const value_type& new_value)  {
      // If the data is stored locally just read and return
      ASSERT_FALSE(is_local(key));
      cache_lock.lock();
      if(cache.contains(key)) {
        cache_entry& entry = cache[key];
        entry.value = new_value;
        entry.value += entry.delta;
      }
      ++background_updates;
      cache_lock.unlock();
    } // end of send_delta_rpc_callback  

    
    // void synchronize(const key_type& key, cache_entry& entry)  {
    //   const value_type delta = entry.current - entry.old;
    //   entry.old = synchronize_rpc(key, delta);
    //   entry.current = entry.old;
    // } // end of synchronize

    // value_type synchronize_rpc(const key_type& key, const value_type& delta) {
    //   if(is_local(key)) {
    //     data_lock.lock();
    //     typename data_map_type::iterator iter = data_map.find(key);
    //     ASSERT_TRUE(iter != data_map.end());
    //     const value_type ret_value = (iter->second += delta);
    //     data_lock.unlock();
    //     return ret_value;
    //   } else {
    //     return rpc.remote_request(owning_cpu(key), 
    //                               &delta_dht::synchronize_rpc, 
    //                               key, delta);
    //   }
    // } // end of synchronize_rpc

  }; // end of delta_dht


}; // end of namespace graphlab
#include <graphlab/macros_undef.hpp>


#endif


================================================
FILE: src/graphlab/rpc/dht.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DHT_HPP
#define GRAPHLAB_DHT_HPP

#include <boost/functional/hash.hpp>
#include <boost/unordered_map.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>

namespace graphlab {

  /**
   * \ingroup rpc
   * Implements a very rudimentary distributed key value store.
   */
  template <typename KeyType, typename ValueType>
  class dht { 

  public:
    typedef boost::unordered_map<size_t, ValueType> storage_type;
  

  private:
    mutable dc_dist_object< dht > rpc;
  
    boost::hash<KeyType> hasher;
    mutex lock;
    storage_type storage;

  public:
    dht(distributed_control &dc) : rpc(dc, this) { }
    
    /**
     * Get the owner of the key
     */    
    procid_t owner(const KeyType& key) const {
      return hasher(key) % rpc.dc().numprocs();
    }
  
    /**
     * gets the value associated with a key.
     * Returns (true, Value) if the entry is available.
     * Returns (false, undefined) otherwise.
     */
    std::pair<bool, ValueType> get(const KeyType &key) const {
      // who owns the data?

      const size_t hashvalue = hasher(key);
      const size_t owningmachine = hashvalue % rpc.numprocs();
      std::pair<bool, ValueType> retval;
      // if it is me, we can return it
      if (owningmachine == rpc.dc().procid()) {

        lock.lock();
        typename storage_type::const_iterator iter = storage.find(hashvalue);
        retval.first = iter != storage.end();
        if (retval.first) retval.second = iter->second;
        lock.unlock();
      } else {
        retval = rpc.remote_request(owningmachine, 
                                         &dht<KeyType,ValueType>::get, 
                                         key);
      }
      return retval;
    }
 
    /**
     * gets the value associated with a key.
     * Returns (true, Value) if the entry is available.
     * Returns (false, undefined) otherwise.
     */
    request_future<std::pair<bool, ValueType> > get_future(const KeyType &key) const {
      // who owns the data?

      const size_t hashvalue = hasher(key);
      const size_t owningmachine = hashvalue % rpc.numprocs();
      std::pair<bool, ValueType> retval;
      // if it is me, we can return it
      if (owningmachine == rpc.dc().procid()) {

        lock.lock();
        typename storage_type::const_iterator iter = storage.find(hashvalue);
        retval.first = iter != storage.end();
        if (retval.first) retval.second = iter->second;
        lock.unlock();
        return retval;
      } else {
        return rpc.future_remote_request(owningmachine, 
                                           &dht<KeyType,ValueType>::get, 
                                           key);
      }
    }
  

    /**
     * Sets the newval to be the value associated with the key
     */
    void set(const KeyType &key, const ValueType &newval) {  
      // who owns the data?
      const size_t hashvalue = hasher(key);
      const size_t owningmachine = hashvalue % rpc.numprocs();
 
      // if it is me, set it
      if (owningmachine == rpc.dc().procid()) {
        lock.lock();
        storage[hashvalue] = newval;
        lock.unlock();
      } else {
        rpc.remote_call(owningmachine, 
                             &dht<KeyType,ValueType>::set, 
                             key, newval);
      }
    }
  
    void print_stats() const {
      std::cerr << rpc.calls_sent() << " calls sent\n";
      std::cerr << rpc.calls_received() << " calls received\n";
    }
  
    /**
       Must be called by all machines simultaneously
    */
    void clear() {
      rpc.barrier();
      storage.clear();
    }

  };

};
#endif


================================================
FILE: src/graphlab/rpc/distributed_event_log.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <pthread.h>
#include <string>
#include <limits>
#include <cfloat>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/macros_def.hpp>
#define DISABLE_DISTRIBUTED_EVENT_LOG
namespace graphlab {


// predeclaration the metric server handlers
static std::pair<std::string, std::string> 
metric_names_json(std::map<std::string, std::string>& vars);

static std::pair<std::string, std::string> 
metric_aggregate_json(std::map<std::string, std::string>& vars);

static std::pair<std::string, std::string> 
metric_by_machine_json(std::map<std::string, std::string>& vars);


static size_t time_to_index(double t) {
  return std::floor(t / 5);
}

static double index_to_time(size_t t) {
  return 5 * t;
}


size_t distributed_event_logger::allocate_log_entry(log_group* group) {
  log_entry_lock.lock();
  size_t id = 0;
  if (has_log_entry.first_zero_bit(id) == false) {
    logger(LOG_FATAL, "More than 256 Log entries created. "
        "New log entries cannot be created");
    // does not return
  }
  logs[id] = group;
  has_log_entry.set_bit(id);
  log_entry_lock.unlock();
  return id;
}

event_log_thread_local_type* distributed_event_logger::get_thread_counter_ref() {
  void* v = pthread_getspecific(key);
  if (v == NULL) {
    // allocate a new thread local entry
    event_log_thread_local_type* entry = new event_log_thread_local_type;
    // set all values to 0
    for (size_t i = 0; i < MAX_LOG_SIZE; ++i) entry->values[i] = 0;
    // cast and write it to v. We need it later. 
    // and set the thread local store
    v = (void*)(entry);
    pthread_setspecific(key, v);

    // register the key entry against the logger
    thread_local_count_lock.lock();
    // find an unused entry
    size_t b = 0;
    if (thread_local_count_slots.first_zero_bit(b) == false) {
      logger(LOG_FATAL, "More than 1024 active threads. "
          "Log counters cannot be created");
      // does not return
    }
    entry->thlocal_slot = b;
    thread_local_count[b] = entry;
    thread_local_count_slots.set_bit(b);
    thread_local_count_lock.unlock();
  }

  event_log_thread_local_type* entry = (event_log_thread_local_type*)(v);
  return entry;
}

/**
 * Receives the log information from each machine
 */    
void distributed_event_logger::rpc_collect_log(size_t srcproc, size_t record_ctr,
                                              std::vector<double> srccounts) {
  foreach(size_t log, has_log_entry) {
    logs[log]->lock.lock();
    // insert the new counts
    size_t entryid = record_ctr;
    logs[log]->earliest_modified_log = 
                    std::min(entryid, logs[log]->earliest_modified_log);
    logs[log]->machine_log_modified = true;
    // resize all procs
    for (procid_t p = 0; p < logs[log]->machine.size(); ++p) {
      if (logs[log]->machine[p].size() < entryid + 1) {
        double prevvalue = 0;
        if (logs[log]->machine[p].size() > 0) {
          prevvalue = logs[log]->machine[p].back().value;
        }
        logs[log]->machine[p].resize(entryid + 1, log_entry(prevvalue));
      }
    }
    logs[log]->machine[srcproc][entryid].value = srccounts[log];
    logs[log]->lock.unlock();
  }
}

void distributed_event_logger::collect_instantaneous_log() {
 foreach(size_t log, has_log_entry) {
    if (logs[log]->logtype == log_type::INSTANTANEOUS) {
      logs[log]->lock.lock();
      // for each log entry which is a callback entry
      // call the callback to get the counts
      if (logs[log]->is_callback_entry) {
        logs[log]->sum_of_instantaneous_entries += logs[log]->callback();
        ++logs[log]->count_of_instantaneous_entries;
      }
      else {
        // sum it across all the threads
        foreach(size_t thr, thread_local_count_slots) {
          logs[log]->sum_of_instantaneous_entries += thread_local_count[thr]->values[log];
        }
        ++logs[log]->count_of_instantaneous_entries;
      }
      logs[log]->lock.unlock();
    }
  }
}

/** 
 *  Collects the machine level
 *  log entry. and sends it to machine 0
 */
void distributed_event_logger::local_collect_log(size_t record_ctr) {
  // put together an aggregate of all counters 
  std::vector<double> combined_counts(MAX_LOG_SIZE, 0);

  // for each thread and for each log entry which is 
  // not a callback entry. Accumulate the number of counts
  //
  foreach(size_t log, has_log_entry) {
    logs[log]->lock.lock();
    // cimulative entry. just add across all threads
    if (logs[log]->logtype == log_type::CUMULATIVE) {
      if (logs[log]->is_callback_entry) {
        combined_counts[log] = logs[log]->callback();
      } else {
        foreach(size_t thr, thread_local_count_slots) {
          size_t* current_thread_counts = thread_local_count[thr]->values;
          combined_counts[log] += current_thread_counts[log];
        }
      }
    }
    else {
      // take the average 
      if (logs[log]->count_of_instantaneous_entries > 0) {
        combined_counts[log] = (double)logs[log]->sum_of_instantaneous_entries / 
                                logs[log]->count_of_instantaneous_entries;
      }
      else {
        combined_counts[log] = 0;
      }
      logs[log]->sum_of_instantaneous_entries = 0;
      logs[log]->count_of_instantaneous_entries = 0;
    }
    logs[log]->lock.unlock();
  }

  // send to machine 0
  if (rmi->procid() != 0) {
    rmi->control_call(0, &distributed_event_logger::rpc_collect_log,
        (size_t)rmi->procid(), record_ctr, combined_counts);
  }
  else {
    rpc_collect_log((size_t)0, record_ctr, combined_counts);
  }
}

// Called only by machine 0 to get the aggregate log
void distributed_event_logger::build_aggregate_log() {
  ASSERT_EQ(rmi->procid(), 0);
  foreach(size_t log, has_log_entry) {
    logs[log]->lock.lock();
    if (logs[log]->machine_log_modified) {
      // what is the previous time the aggregate was computed?
      // The sum takes the open interval (prevtime, current_time]
      // thus the first time this is called, we may drop one entry
      // if we let prevtime initialize at 0
      size_t prevtime = logs[log]->earliest_modified_log;
      size_t lasttime = prevtime + 1;
      for (procid_t p = 0; p < logs[log]->machine.size(); ++p) {
        lasttime = std::max(lasttime, logs[log]->machine[p].size());
      }
      // if it is a CUMULATIVE log, take the latest entry from each machine
      // if it is an INSTANTANEOUS log, take the average of the last times.
      if (logs[log]->aggregate.size() < lasttime) {
        if (logs[log]->logtype == log_type::CUMULATIVE) {
          double lastval = 0;
          if (logs[log]->aggregate.size() > 0) {
            lastval = logs[log]->aggregate.rbegin()->value;
          }
          logs[log]->aggregate.resize(lasttime, log_entry(lastval));
        }
        else {
          logs[log]->aggregate.resize(lasttime);
        }
      }
   
      for (size_t t = prevtime; t < lasttime; ++t) {
        double sum = 0;
        for (procid_t p = 0; p < logs[log]->machine.size(); ++p) {
          if (t < logs[log]->machine[p].size()) {
            sum += logs[log]->machine[p][t].value;
          }
        }
        logs[log]->aggregate[t].value = sum;
      }
      logs[log]->earliest_modified_log = (size_t)(-1);
      logs[log]->machine_log_modified = false;
    }
    logs[log]->lock.unlock();
  }
}

void distributed_event_logger::periodic_timer() {
  periodic_timer_lock.lock();
  timer ti; ti.start();
  int tick_ctr = 0;
  int record_ctr = 0;

  int ticks_per_record = RECORD_FREQUENCY / TICK_FREQUENCY;

  while (!periodic_timer_stop){ 
    collect_instantaneous_log();
    if (tick_ctr % ticks_per_record == 0) {
      local_collect_log(record_ctr);
      ++record_ctr;
      if (rmi->procid() == 0)  build_aggregate_log();
    }
    // when is the next tick
    ++tick_ctr;
    int nexttick_time = tick_ctr * 1000 * TICK_FREQUENCY;
    int nexttick_interval = nexttick_time - ti.current_time_millis();
    // we lost a tick.
    if (nexttick_interval < 10) continue;
    periodic_timer_cond.timedwait_ms(periodic_timer_lock, nexttick_interval);
  }
  periodic_timer_lock.unlock();
}

distributed_event_logger::distributed_event_logger():rmi(NULL) {
  pthread_key_create(&key, NULL);
  // clear the bit fields
  has_log_entry.clear();
  thread_local_count_slots.clear();
  periodic_timer_stop = false;
}

void distributed_event_logger::destroy_event_logger() {
  // kill the tick thread
  bool thread_was_started = false;
  periodic_timer_lock.lock();
  // if periodic_timer_stop is false, then
  // thread was started. signal it and wait for it later to 
  // join
  if (periodic_timer_stop == false) {
    periodic_timer_stop = true;
    thread_was_started = true;
    periodic_timer_cond.signal();
  }
  periodic_timer_lock.unlock();
  if (thread_was_started) tick_thread.join();
  // make sure everyone has joined before I start freeing stuff
  rmi->full_barrier();
  delete rmi;
  pthread_key_delete(key);
  // here also free all the allocated memory!
  foreach(size_t thr, thread_local_count_slots) {
    if (thread_local_count[thr] != NULL) delete thread_local_count[thr];
  }
  foreach(size_t log, has_log_entry) {
    if (logs[log] != NULL) delete logs[log];
  }


}

void distributed_event_logger::set_dc(distributed_control& dc) {
  if (rmi == NULL) {
    rmi = new dc_dist_object<distributed_event_logger>(dc, this);
    // register a deletion callback since the distributed_event_logger
    // will be destroyed only after main

    dc.register_deletion_callback(boost::bind(
                              &distributed_event_logger::destroy_event_logger, 
                              this));

    dc.barrier();
    // everyone starts the timer at the same time
    // at the one distributed synchronization point we have
    ti.start();
    // procid 0 waits 0.2s to skew the local timer a little
    // so everyone else's log has time to show up
    if (rmi->procid() == 0) {
      timer::sleep_ms(200);
    }
    periodic_timer_stop = false;
    // spawn a thread for the tick
#ifndef DISABLE_DISTRIBUTED_EVENT_LOG
    tick_thread.launch(boost::bind(&distributed_event_logger::periodic_timer,
          this));
#endif
    // register the metric server callbacks
    add_metric_server_callback("names.json", metric_names_json);
    add_metric_server_callback("metrics_aggregate.json", metric_aggregate_json);
    add_metric_server_callback("metrics_by_machine.json", metric_by_machine_json);
  }
}
    
size_t distributed_event_logger::create_log_entry(std::string name, 
                                            std::string units,
                                            log_type::log_type_enum logtype) {
  // look for an entry with the same name
  bool has_existing = false;
  size_t existingid = 0;
  log_entry_lock.lock();
  foreach(size_t log, has_log_entry) {
    if (logs[log]->name == name) {
      ASSERT_MSG(logs[log]->is_callback_entry == false,
                 "Cannot convert callback log to counter log");
      has_existing = true;
      existingid = log;
      break;
    }
  }
  log_entry_lock.unlock();
  if (has_existing) return existingid;

  log_group* group = new log_group;
  group->logtype = logtype;
  group->name = name;
  group->units = units;
  group->callback = NULL;
  group->is_callback_entry = false;
  group->earliest_modified_log = 1;
  group->machine_log_modified = false;
  group->sum_of_instantaneous_entries = 0.0;
  group->count_of_instantaneous_entries = 0;
  // only allocate the machine vector on the root machine.
  // no one else needs it 
  if (rmi->procid() == 0) {
    group->machine.resize(rmi->numprocs());
  } 
  // ok. get an ID
  size_t id = allocate_log_entry(group);
  // enforce that all machines are running this at the same time 
  rmi->barrier();
  return id;
}

size_t distributed_event_logger::create_callback_entry(std::string name, 
              std::string units,
              boost::function<double(void)> callback,
              log_type::log_type_enum logtype) {
  bool has_existing = false;
  size_t existingid = 0;
  log_entry_lock.lock();
  foreach(size_t log, has_log_entry) {
    if (logs[log]->name == name) {
      has_existing = true;
      existingid = log;
      break;
    }
  }
  log_entry_lock.unlock();
  if (has_existing) {
    // ok... we have an existing entry. We may
    // overwrite the callback if the callback is NULL
    ASSERT_MSG(logs[existingid]->is_callback_entry == true,
                 "Cannot convert counter log to callback log");

    logs[existingid]->lock.lock();
    ASSERT_MSG(logs[existingid]->callback == NULL, 
        "Cannot create another callback log entry with"
        "the same name %s", name.c_str());
    logs[existingid]->callback = callback;
    logs[existingid]->lock.unlock();
    return existingid;
  }

  log_group* group = new log_group;
  group->logtype = logtype;
  group->name = name;
  group->units = units;
  group->earliest_modified_log = 0;
  group->machine_log_modified = false;
  group->callback = callback;
  group->is_callback_entry = true;
  group->sum_of_instantaneous_entries = 0.0;
  group->count_of_instantaneous_entries = 0;

  // only allocate the machine vector on the root machine.
  // no one else needs it 
  if (rmi->procid() == 0) {
    group->machine.resize(rmi->numprocs());
  } 
  // ok. get an ID
  size_t id = allocate_log_entry(group);
  // enforce that all machines are running this at the same time 
  rmi->barrier();
  return id;
}

void distributed_event_logger::thr_inc_log_entry(size_t entry, size_t value) {
  event_log_thread_local_type* ev = get_thread_counter_ref();
  ASSERT_LT(entry, MAX_LOG_SIZE);
  ASSERT_EQ(logs[entry]->is_callback_entry, false);
  ev->values[entry] += value;
}

void distributed_event_logger::thr_dec_log_entry(size_t entry, size_t value) {
  event_log_thread_local_type* ev = get_thread_counter_ref();
  ASSERT_LT(entry, MAX_LOG_SIZE);
  // does not work for cumulative logs
  ASSERT_NE((int)logs[entry]->logtype, (int) log_type::CUMULATIVE);
  ASSERT_EQ(logs[entry]->is_callback_entry, false);
  ev->values[entry] -= value;
}


void distributed_event_logger::free_callback_entry(size_t entry) {
  ASSERT_LT(entry, MAX_LOG_SIZE);
  // does not work for cumulative logs
  logs[entry]->lock.lock();
  ASSERT_EQ(logs[entry]->is_callback_entry, true);
  logs[entry]->callback = NULL;
  logs[entry]->lock.unlock();
}

distributed_event_logger& get_event_log() {
  static distributed_event_logger dist_event_log;
  return dist_event_log;
}


/*
   Used to process the names.json request
*/
std::pair<std::string, std::string> 
static metric_names_json(std::map<std::string, std::string>& vars) {
  std::stringstream strm;
  char *pname = getenv("_");
  std::string progname;
  if (pname != NULL) progname = pname;


  distributed_event_logger& evlog = get_event_log();
  log_group** logs = evlog.get_logs_ptr();
  fixed_dense_bitset<MAX_LOG_SIZE>& has_log_entry = evlog.get_logs_bitset();

  strm << "{\n"
       << "  \"program_name\": \""<< progname << "\",\n"
       << "  \"time\": " << evlog.get_current_time() << ",\n"
       << "  \"metrics\": [\n";
  // output the metrics
  size_t nlogs = has_log_entry.popcount();

  size_t logcount = 0;
  foreach(size_t log, has_log_entry) {

    logs[log]->lock.lock();
    double rate_val = 0;
    size_t len = logs[log]->aggregate.size();
    if (len >= 1) { 
      double logtime = index_to_time(logs[log]->aggregate.size() - 1);
      double logval = logs[log]->aggregate.rbegin()->value;
      double prevtime = 0;
      double prevval = 0;
      if (logs[log]->aggregate.size() >= 2) {
        prevtime = index_to_time(logs[log]->aggregate.size() - 2);
        prevval = logs[log]->aggregate[len - 2].value;
      }
      if (logs[log]->logtype == log_type::CUMULATIVE) {
        rate_val = (logval - prevval) / (logtime - prevtime);
      }
      else {
        rate_val = logval;
      }
    }

    strm << "    {\n"
         << "      \"id\":" << log << ",\n"
         << "      \"name\": \"" << logs[log]->name << "\",\n"
         << "      \"units\": \"" << logs[log]->units << "\",\n"
         << "      \"cumulative\": " << (int)(logs[log]->logtype) << ",\n"
         << "      \"rate_val\": " << rate_val << ",\n"
         << "      \"value\": " << ( logs[log]->aggregate.size() > 0 ?
                                              logs[log]->aggregate.rbegin()->value 
                                              : 0 ) << "\n"
         << "    }\n";

    logs[log]->lock.unlock();
    ++logcount;
    if (logcount < nlogs) strm << ",";
  }
  strm << "  ]\n"
       << "}\n";

  return std::make_pair(std::string("text/plain"), strm.str());
}

std::pair<std::string, std::string> 
static metric_aggregate_json(std::map<std::string, std::string>& vars) {
  double tstart = 0;
  double tend = DBL_MAX;
  bool rate = false;
  std::string name;
  // see what variables there are

  size_t idxstart = time_to_index(tstart);
  size_t idxend = (size_t)(-1);
  if (vars.count("name")) name = vars["name"];
  if (vars.count("tstart")) {
    tstart = atof(vars["tstart"].c_str());
    idxstart = time_to_index(tstart);
  }
  if (vars.count("tend")) {
    tend = atof(vars["tend"].c_str());
    idxend = time_to_index(tend) + 1;
  }
  if (vars.count("rate")) rate = (atoi(vars["rate"].c_str()) != 0);
  if (vars.count("tlast")) {
    double tlast = atof(vars["tlast"].c_str());
    tstart = get_event_log().get_current_time() - tlast;
    tstart = tstart < 0.0 ? 0.0 : tstart;
    tend = get_event_log().get_current_time();
    idxstart = time_to_index(tstart);
    idxend = time_to_index(tend) + 1;
  }

  // name is not optional
  name = trim(name);

  distributed_event_logger& evlog = get_event_log();
  log_group** logs = evlog.get_logs_ptr();
  fixed_dense_bitset<MAX_LOG_SIZE>& has_log_entry = evlog.get_logs_bitset();

  std::stringstream strm;

  size_t nlogs = has_log_entry.popcount();
  size_t logcount = 0;
  
  // if name is empty, I should extract all metrics
  bool extract_all = (name.length() == 0);

  // make a top level array

  strm << "[\n";
  foreach(size_t log, has_log_entry) {

    if (logs[log]->name == name || extract_all) {

      logs[log]->lock.lock();
      strm << "    {\n"
           << "      \"id\":" << log << ",\n"
           << "      \"name\": \"" << logs[log]->name << "\",\n"
           << "      \"units\": \"" << logs[log]->units << "\",\n"
           << "      \"name\": \"" << logs[log]->name << "\",\n"
           << "      \"cumulative\": " << (int)(logs[log]->logtype) << ",\n"
           << "      \"record\": [";

      std::vector<std::pair<double, double> > output_entries;
      // annoyingly, json does not let me put a trailing comma in the array.
      // thus I need to first write it to a vector, before dumping it to json
      size_t log_idxend = std::min(idxend, logs[log]->aggregate.size());
      for (size_t i = idxstart; i < log_idxend ; ++i) {
        double logtime = index_to_time(i);
        double logval = logs[log]->aggregate[i].value;
        // only cumulative logs can have rate
        if (rate == 0 || logs[log]->logtype == log_type::INSTANTANEOUS) {
          output_entries.push_back(std::make_pair(logtime, logval));
        }
        else {
          double prevval = 0;
          double prevtime = 0;
          if (i > 0) {
            prevtime = index_to_time(i - 1);
            prevval = logs[log]->aggregate[i - 1].value;
          }
          double currate = 0;
          // avoid divide by zero annoyances
          if (logtime > prevtime) {
            currate = (logval - prevval) / (logtime - prevtime);
          }
          output_entries.push_back(std::make_pair(logtime, currate));
        }
      }

      logs[log]->lock.unlock();
      for (size_t i = 0 ;i < output_entries.size(); ++i) {
        strm << " [" 
             << output_entries[i].first << ", " 
             << output_entries[i].second 
             << "] ";
        // add a comma if this is not the last entry
        if (i < output_entries.size() - 1) strm << ", ";
      }
      strm << "]\n"
        << "    }\n";

      // if I am not supposed to extract all, then I am done here.
      if (!extract_all) break;
      ++logcount;
      if (logcount < nlogs) strm << ",\n";
    }

  }

  strm << "]\n";
  return std::make_pair(std::string("text/plain"), strm.str());
}


std::pair<std::string, std::string> 
static metric_by_machine_json(std::map<std::string, std::string>& vars) {
  double tstart = 0;
  double tend = DBL_MAX;
  bool rate = false;
  std::string name;
  size_t machine = 0;
  bool has_machine_filter = false;
  // see what variables there are
  size_t idxstart = 0;
  size_t idxend = (size_t)(-1);
 
  if (vars.count("name")) name = vars["name"];
  if (vars.count("machine")) {
    has_machine_filter = true;
    machine = atoi(vars["machine"].c_str());
  }
  if (vars.count("tstart")) {
    tstart = atof(vars["tstart"].c_str());
    idxstart = time_to_index(tstart);
  }
  if (vars.count("tend")) {
    tend = atof(vars["tend"].c_str());
    idxend = time_to_index(tend) + 1;
  }
  if (vars.count("rate")) rate = (atoi(vars["rate"].c_str()) != 0);
  if (vars.count("tlast")) {
    double tlast = atof(vars["tlast"].c_str());
    tstart = get_event_log().get_current_time() - tlast;
    tstart = tstart < 0.0 ? 0.0 : tstart;
    tend = get_event_log().get_current_time();
    idxstart = time_to_index(tstart);
    idxend = time_to_index(tend) + 1;
  }


  // name is not optional
  name = trim(name);

  distributed_event_logger& evlog = get_event_log();
  log_group** logs = evlog.get_logs_ptr();
  fixed_dense_bitset<MAX_LOG_SIZE>& has_log_entry = evlog.get_logs_bitset();

  std::stringstream strm;

  size_t nlogs = has_log_entry.popcount();
  size_t logcount = 0;
  
  // if name is empty, I should extract all metrics
  bool extract_all = (name.length() == 0);

  // make a top level array

  strm << "[\n";
  foreach(size_t log, has_log_entry) {
    if (logs[log]->name == name || extract_all) {

      logs[log]->lock.lock();
      strm << "    {\n"
           << "      \"id\":" << log << ",\n"
           << "      \"name\": \"" << logs[log]->name << "\",\n"
           << "      \"units\": \"" << logs[log]->units << "\",\n"
           << "      \"cumulative\": " << (int)(logs[log]->logtype) << ",\n"
           << "      \"record\": ";
      
      std::vector<std::vector<std::pair<double, double> > > all_output_entries;
      // annoyingly, json does not let me put a trailing comma in the array.
      // thus I need to first write it to a vector, before dumping it to json
      // and annoying 2 dimensional output arrays...
      //
      size_t p_start = 0;
      size_t p_end = logs[log]->machine.size();
      if (has_machine_filter) {
        p_start = machine;
        p_end = machine + 1;
      }
      for (size_t p = p_start; p < p_end; ++p) {
        std::vector<log_entry>& current = logs[log]->machine[p];
        std::vector<std::pair<double, double> > output_entries;

        size_t log_idxend = std::min(idxend, current.size());
        for (size_t i = idxstart; i < log_idxend; ++i) {
          double logtime = index_to_time(i);
          double logval = current[i].value;
  
          if (logtime > tstart && logtime <= tend) {
            // only cumulative logs can have rate
            if (rate == 0 || logs[log]->logtype == log_type::INSTANTANEOUS) {
              output_entries.push_back(std::make_pair(logtime, logval));
            }
            else {
              double prevval = 0;
              double prevtime = 0;
              if (i > 0) {
                prevtime = index_to_time(i - 1);
                prevval = current[i - 1].value;
              }
              double currate = 0;
              // avoid divide by zero annoyances
              if (logtime > prevtime) {
                currate = (logval - prevval) / (logtime - prevtime);
              }
              output_entries.push_back(std::make_pair(logtime, currate));
            }
          }
        }
        all_output_entries.push_back(output_entries);
      }

      logs[log]->lock.unlock();
      strm << "[ ";
      for (size_t p = 0; p < all_output_entries.size(); ++p) {
        std::vector<std::pair<double, double> >& output_entries = all_output_entries[p];
        strm << "[ ";
        for (size_t i = 0 ;i < output_entries.size(); ++i) {
          strm << " [" 
              << output_entries[i].first << ", " 
              << output_entries[i].second 
              << "] ";
          // add a comma if this is not the last entry
          if (i < output_entries.size() - 1) strm << ", ";
        }

        strm << "] ";
        if (p < all_output_entries.size() - 1) strm << ", ";
      }
      strm << "]\n"
        << " }\n";


      // if I am not supposed to extract all, then I am done here.
      if (!extract_all) break;
      ++logcount;
      if (logcount < nlogs) strm << ",\n";
    }
  }

  strm << "]\n";
  return std::make_pair(std::string("text/plain"), strm.str());
}


} // namespace graphlab


================================================
FILE: src/graphlab/rpc/distributed_event_log.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DISTRIBUTED_EVENT_LOG_HPP
#define GRAPHLAB_DISTRIBUTED_EVENT_LOG_HPP
#include <iostream>
#include <string>
#include <vector>
#include <boost/bind.hpp>
#include <boost/function.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/stl_util.hpp>
namespace graphlab {

// forward declaration because we need this in the
// class but we want dc_dist_object to be able
// to use this class too.
template <typename T>
class dc_dist_object;
class distributed_control;


const size_t MAX_LOG_SIZE = 256;
const size_t MAX_LOG_THREADS = 1024;
const double TICK_FREQUENCY = 0.5;
const double RECORD_FREQUENCY = 5.0;


/// A single entry in time
struct log_entry: public IS_POD_TYPE {
  // The value at the time. If this is a CUMULATIVE entry, this
  // will contain the total number of events since the start
  double value;

  explicit log_entry(double value = 0): value(value) { }
};


namespace log_type {
enum log_type_enum {
  INSTANTANEOUS = 0, ///< Sum of log values over time are not meaningful 
  CUMULATIVE = 1    ///< Sum of log values over time are meaningful 
};
}

/// Logging information for a particular log entry (say \#updates)
struct log_group{
  mutex lock;

  /// name of the group
  std::string name;

  /// unit of measurement
  std::string units;

  /// Set to true if this is a callback entry
  bool is_callback_entry;

  /// The type of log. Instantaneous or Cumulative 
  log_type::log_type_enum logtype;

  boost::function<double(void)> callback;

  size_t sum_of_instantaneous_entries;
  size_t count_of_instantaneous_entries;

  bool machine_log_modified;  
  size_t earliest_modified_log;

  /// machine[i] holds a vector of entries from machine i
  std::vector<std::vector<log_entry> > machine;
  /// aggregate holds vector of totals
  std::vector<log_entry> aggregate;
};


/**
 * This is the type that is held in the thread local store
 */
struct event_log_thread_local_type {
  /** The values written to by each thread. 
   * An array with max length MAX_LOG_SIZE 
   */
  size_t values[MAX_LOG_SIZE];
  size_t thlocal_slot;

  // These are used for time averaging instantaneous values
};


class distributed_event_logger {
  private:
    // a key to allow multiple threads, each to have their
    // own counter. Decreases performance penalty of the
    // the event logger.
    pthread_key_t key;

    dc_dist_object<distributed_event_logger>* rmi;
    
    // The array of logs. We can only have a maximum of MAX_LOG_SIZE logs
    // This is only created on machine 0
    log_group* logs[MAX_LOG_SIZE];
    // this bit field is used to identify which log entries are active
    fixed_dense_bitset<MAX_LOG_SIZE> has_log_entry;
    mutex log_entry_lock;

    // A collection of slots, one for each thread, to hold 
    // the current thread's active log counter.
    // Threads will write directly into here
    // and a master timer will sum it all up periodically
    event_log_thread_local_type* thread_local_count[MAX_LOG_THREADS];
    // a bitset which lets me identify which slots in thread_local_counts
    // are used.
    fixed_dense_bitset<MAX_LOG_THREADS> thread_local_count_slots; 
    mutex thread_local_count_lock;

    // timer managing the frequency at which logs are transmitted to the root
    timer ti; 
    thread tick_thread;

    size_t allocate_log_entry(log_group* group);
    /**
      * Returns a pointer to the current thread log counter
      * creating one if one does not already exist.
      */
    event_log_thread_local_type* get_thread_counter_ref();

    /**
     * Receives the log information from each machine
     */    
    void rpc_collect_log(size_t srcproc, size_t record_ctr,
                         std::vector<double> srccounts);

    void collect_instantaneous_log(); 
    /** 
     *  Collects the machine level
     *  log entry. and sends it to machine 0
     */
    void local_collect_log(size_t record_ctr); 
    
    // Called only by machine 0 to get the aggregate log
    void build_aggregate_log();

    mutex periodic_timer_lock;
    conditional periodic_timer_cond;
    bool periodic_timer_stop;

    /** a new thread spawns here and sleeps for 5 seconds at a time
     *  when it wakes up it will insert log entries
     */
    void periodic_timer();
  public:
    distributed_event_logger();

    // called by the destruction of distributed_control
    void destroy_event_logger();


    /**
     * Associates the event log with a DC object.
     * Must be called by all machines simultaneously.
     * Can be called more than once, but only the first call will have
     * an effect.
     */
    void set_dc(distributed_control& dc);
    /**
     * Creates a new log entry with a given name and log type.
     * Returns the ID of the log. Must be called by 
     * all machines simultaneously with the same settings.
     * units is the unit of measurement.
     */
    size_t create_log_entry(std::string name, std::string units, 
                            log_type::log_type_enum logtype);

    /**
     * Creates a new callback log entry with a given name and log type.
     * Returns the ID of the log. Must be called by 
     * all machines simultaneously with the same settings.
     * units is the unit of measurement.
     * Callback will be triggered periodically.
     * Callback entries must be deleted once the callback goes
     * out of scope.
     */
    size_t create_callback_entry(std::string name, 
                                 std::string units,
                                 boost::function<double(void)> callback,
                                 log_type::log_type_enum logtype);

    void free_callback_entry(size_t entry);

    /**
     * Increments the value of a log entry
     */
    void thr_inc_log_entry(size_t entry, size_t value);

    /**
     * Increments the value of a log entry
     */
    void thr_dec_log_entry(size_t entry, size_t value);


    /// \cond GRAPHLAB_INTERNAL
    inline double get_current_time() const {
      return ti.current_time();
    }

    inline log_group** get_logs_ptr() {
      return logs;
    }

    inline fixed_dense_bitset<MAX_LOG_SIZE>& get_logs_bitset() {
      return has_log_entry;
    }

    /// \endcond
    
};


extern distributed_event_logger& get_event_log();


} // namespace graphlab
#define DECLARE_EVENT(name) size_t name;

#define INITIALIZE_EVENT_LOG(dc) graphlab::get_event_log().set_dc(dc);
#define ADD_CUMULATIVE_EVENT(name, desc, units) \
    name = graphlab::get_event_log().create_log_entry(desc, units, graphlab::log_type::CUMULATIVE);

#define ADD_INSTANTANEOUS_EVENT(name, desc, units) \
    name = graphlab::get_event_log().create_log_entry(desc, units, graphlab::log_type::INSTANTANEOUS);

#define ADD_CUMULATIVE_CALLBACK_EVENT(name, desc, units, callback) \
    name = graphlab::get_event_log().create_callback_entry(desc, units, callback, \
          graphlab::log_type::CUMULATIVE);


#define ADD_INSTANTANEOUS_CALLBACK_EVENT(name, desc, units, callback) \
    name = graphlab::get_event_log().create_callback_entry(desc, units, callback, \
           graphlab::log_type::INSTANTANEOUS);


#define FREE_CALLBACK_EVENT(name) \
  graphlab::get_event_log().free_callback_entry(name);

#define INCREMENT_EVENT(name, count) graphlab::get_event_log().thr_inc_log_entry(name, count);
#define DECREMENT_EVENT(name, count) graphlab::get_event_log().thr_dec_log_entry(name, count);

#endif


================================================
FILE: src/graphlab/rpc/evwrapdef.h
================================================
#define event_active gl_event_active
#define event_active_nolock gl_event_active_nolock
#define event_add gl_event_add
#define event_assign gl_event_assign
#define event_base_add_virtual gl_event_base_add_virtual
#define event_base_assert_ok gl_event_base_assert_ok
#define event_base_del_virtual gl_event_base_del_virtual
#define event_base_dispatch gl_event_base_dispatch
#define event_base_dump_events gl_event_base_dump_events
#define event_base_free gl_event_base_free
#define event_base_get_deferred_cb_queue gl_event_base_get_deferred_cb_queue
#define event_base_get_features gl_event_base_get_features
#define event_base_get_method gl_event_base_get_method
#define event_base_gettimeofday_cached gl_event_base_gettimeofday_cached
#define event_base_got_break gl_event_base_got_break
#define event_base_got_exit gl_event_base_got_exit
#define event_base_init_common_timeout gl_event_base_init_common_timeout
#define event_base_loop gl_event_base_loop
#define event_base_loopbreak gl_event_base_loopbreak
#define event_base_loopexit gl_event_base_loopexit
#define event_base_new gl_event_base_new
#define event_base_new_with_config gl_event_base_new_with_config
#define event_base_once gl_event_base_once
#define event_base_priority_init gl_event_base_priority_init
#define event_base_set gl_event_base_set
#define event_base_start_iocp gl_event_base_start_iocp
#define event_base_stop_iocp gl_event_base_stop_iocp
#define event_config_avoid_method gl_event_config_avoid_method
#define event_config_free gl_event_config_free
#define event_config_new gl_event_config_new
#define event_config_require_features gl_event_config_require_features
#define event_config_set_flag gl_event_config_set_flag
#define event_config_set_num_cpus_hint gl_event_config_set_num_cpus_hint
#define event_debug_map_HT_CLEAR gl_event_debug_map_HT_CLEAR
#define event_debug_map_HT_GROW gl_event_debug_map_HT_GROW
#define _event_debug_map_HT_REP_IS_BAD gl__event_debug_map_HT_REP_IS_BAD
#define _event_debug_mode_on gl__event_debug_mode_on
#define event_debug_unassign gl_event_debug_unassign
#define event_deferred_cb_cancel gl_event_deferred_cb_cancel
#define event_deferred_cb_init gl_event_deferred_cb_init
#define event_deferred_cb_queue_init gl_event_deferred_cb_queue_init
#define event_deferred_cb_schedule gl_event_deferred_cb_schedule
#define event_del gl_event_del
#define event_dispatch gl_event_dispatch
#define event_enable_debug_mode gl_event_enable_debug_mode
#define event_free gl_event_free
#define event_get_assignment gl_event_get_assignment
#define event_get_base gl_event_get_base
#define event_get_callback gl_event_get_callback
#define event_get_callback_arg gl_event_get_callback_arg
#define event_get_events gl_event_get_events
#define event_get_fd gl_event_get_fd
#define event_get_method gl_event_get_method
#define event_get_struct_event_size gl_event_get_struct_event_size
#define event_get_supported_methods gl_event_get_supported_methods
#define event_get_version gl_event_get_version
#define event_get_version_number gl_event_get_version_number
#define event_global_current_base_ gl_event_global_current_base_
#define event_global_setup_locks_ gl_event_global_setup_locks_
#define event_init gl_event_init
#define event_initialized gl_event_initialized
#define event_loop gl_event_loop
#define event_loopbreak gl_event_loopbreak
#define event_loopexit gl_event_loopexit
#define event_mm_calloc_ gl_event_mm_calloc_
#define event_mm_free_ gl_event_mm_free_
#define event_mm_malloc_ gl_event_mm_malloc_
#define event_mm_realloc_ gl_event_mm_realloc_
#define event_mm_strdup_ gl_event_mm_strdup_
#define event_new gl_event_new
#define event_once gl_event_once
#define event_pending gl_event_pending
#define event_priority_init gl_event_priority_init
#define event_priority_set gl_event_priority_set
#define event_reinit gl_event_reinit
#define event_set gl_event_set
#define event_set_mem_functions gl_event_set_mem_functions
#define evthread_make_base_notifiable gl_evthread_make_base_notifiable
#define _evthread_cond_fns gl__evthread_cond_fns
#define _evthread_debug_get_real_lock gl__evthread_debug_get_real_lock
#define evthread_enable_lock_debuging gl_evthread_enable_lock_debuging
#define _evthread_id_fn gl__evthread_id_fn
#define _evthread_is_debug_lock_held gl__evthread_is_debug_lock_held
#define _evthread_lock_debugging_enabled gl__evthread_lock_debugging_enabled
#define _evthread_lock_fns gl__evthread_lock_fns
#define evthread_set_condition_callbacks gl_evthread_set_condition_callbacks
#define evthread_set_id_callback gl_evthread_set_id_callback
#define evthread_set_lock_callbacks gl_evthread_set_lock_callbacks
#define evthread_setup_global_lock_ gl_evthread_setup_global_lock_
#define evbuffer_add gl_evbuffer_add
#define evbuffer_add_buffer gl_evbuffer_add_buffer
#define evbuffer_add_cb gl_evbuffer_add_cb
#define evbuffer_add_file gl_evbuffer_add_file
#define evbuffer_add_printf gl_evbuffer_add_printf
#define evbuffer_add_reference gl_evbuffer_add_reference
#define evbuffer_add_vprintf gl_evbuffer_add_vprintf
#define evbuffer_cb_clear_flags gl_evbuffer_cb_clear_flags
#define evbuffer_cb_set_flags gl_evbuffer_cb_set_flags
#define _evbuffer_chain_pin gl__evbuffer_chain_pin
#define _evbuffer_chain_unpin gl__evbuffer_chain_unpin
#define evbuffer_clear_flags gl_evbuffer_clear_flags
#define evbuffer_commit_space gl_evbuffer_commit_space
#define evbuffer_copyout gl_evbuffer_copyout
#define _evbuffer_decref_and_unlock gl__evbuffer_decref_and_unlock
#define evbuffer_defer_callbacks gl_evbuffer_defer_callbacks
#define evbuffer_drain gl_evbuffer_drain
#define evbuffer_enable_locking gl_evbuffer_enable_locking
#define evbuffer_expand gl_evbuffer_expand
#define _evbuffer_expand_fast gl__evbuffer_expand_fast
#define evbuffer_find gl_evbuffer_find
#define evbuffer_free gl_evbuffer_free
#define evbuffer_freeze gl_evbuffer_freeze
#define evbuffer_get_contiguous_space gl_evbuffer_get_contiguous_space
#define evbuffer_get_length gl_evbuffer_get_length
#define _evbuffer_incref gl__evbuffer_incref
#define _evbuffer_incref_and_lock gl__evbuffer_incref_and_lock
#define evbuffer_invoke_callbacks gl_evbuffer_invoke_callbacks
#define evbuffer_lock gl_evbuffer_lock
#define evbuffer_new gl_evbuffer_new
#define evbuffer_peek gl_evbuffer_peek
#define evbuffer_prepend gl_evbuffer_prepend
#define evbuffer_prepend_buffer gl_evbuffer_prepend_buffer
#define evbuffer_ptr_set gl_evbuffer_ptr_set
#define evbuffer_pullup gl_evbuffer_pullup
#define evbuffer_read gl_evbuffer_read
#define evbuffer_readline gl_evbuffer_readline
#define evbuffer_readln gl_evbuffer_readln
#define _evbuffer_read_setup_vecs gl__evbuffer_read_setup_vecs
#define evbuffer_remove gl_evbuffer_remove
#define evbuffer_remove_buffer gl_evbuffer_remove_buffer
#define evbuffer_remove_cb gl_evbuffer_remove_cb
#define evbuffer_remove_cb_entry gl_evbuffer_remove_cb_entry
#define evbuffer_reserve_space gl_evbuffer_reserve_space
#define evbuffer_search gl_evbuffer_search
#define evbuffer_search_eol gl_evbuffer_search_eol
#define evbuffer_search_range gl_evbuffer_search_range
#define evbuffer_setcb gl_evbuffer_setcb
#define evbuffer_set_flags gl_evbuffer_set_flags
#define evbuffer_set_parent gl_evbuffer_set_parent
#define _evbuffer_testing_use_linear_file_access gl__evbuffer_testing_use_linear_file_access
#define _evbuffer_testing_use_mmap gl__evbuffer_testing_use_mmap
#define _evbuffer_testing_use_sendfile gl__evbuffer_testing_use_sendfile
#define evbuffer_unfreeze gl_evbuffer_unfreeze
#define evbuffer_unlock gl_evbuffer_unlock
#define evbuffer_write gl_evbuffer_write
#define evbuffer_write_atmost gl_evbuffer_write_atmost
#define _bufferevent_add_event gl__bufferevent_add_event
#define bufferevent_decref gl_bufferevent_decref
#define _bufferevent_decref_and_unlock gl__bufferevent_decref_and_unlock
#define _bufferevent_del_generic_timeout_cbs gl__bufferevent_del_generic_timeout_cbs
#define bufferevent_disable gl_bufferevent_disable
#define bufferevent_disable_hard gl_bufferevent_disable_hard
#define bufferevent_enable gl_bufferevent_enable
#define bufferevent_enable_locking gl_bufferevent_enable_locking
#define bufferevent_flush gl_bufferevent_flush
#define bufferevent_free gl_bufferevent_free
#define _bufferevent_generic_adj_timeouts gl__bufferevent_generic_adj_timeouts
#define bufferevent_get_base gl_bufferevent_get_base
#define bufferevent_get_enabled gl_bufferevent_get_enabled
#define bufferevent_getfd gl_bufferevent_getfd
#define bufferevent_get_input gl_bufferevent_get_input
#define bufferevent_get_output gl_bufferevent_get_output
#define bufferevent_get_underlying gl_bufferevent_get_underlying
#define bufferevent_incref gl_bufferevent_incref
#define _bufferevent_incref_and_lock gl__bufferevent_incref_and_lock
#define bufferevent_init_common gl_bufferevent_init_common
#define _bufferevent_init_generic_timeout_cbs gl__bufferevent_init_generic_timeout_cbs
#define bufferevent_lock gl_bufferevent_lock
#define bufferevent_read gl_bufferevent_read
#define bufferevent_read_buffer gl_bufferevent_read_buffer
#define _bufferevent_run_eventcb gl__bufferevent_run_eventcb
#define _bufferevent_run_readcb gl__bufferevent_run_readcb
#define _bufferevent_run_writecb gl__bufferevent_run_writecb
#define bufferevent_setcb gl_bufferevent_setcb
#define bufferevent_setfd gl_bufferevent_setfd
#define bufferevent_settimeout gl_bufferevent_settimeout
#define bufferevent_set_timeouts gl_bufferevent_set_timeouts
#define bufferevent_setwatermark gl_bufferevent_setwatermark
#define bufferevent_suspend_read gl_bufferevent_suspend_read
#define bufferevent_suspend_write gl_bufferevent_suspend_write
#define bufferevent_unlock gl_bufferevent_unlock
#define bufferevent_unsuspend_read gl_bufferevent_unsuspend_read
#define bufferevent_unsuspend_write gl_bufferevent_unsuspend_write
#define bufferevent_write gl_bufferevent_write
#define bufferevent_write_buffer gl_bufferevent_write_buffer
#define bufferevent_base_set gl_bufferevent_base_set
#define bufferevent_new gl_bufferevent_new
#define bufferevent_ops_socket gl_bufferevent_ops_socket
#define bufferevent_priority_set gl_bufferevent_priority_set
#define bufferevent_socket_connect gl_bufferevent_socket_connect
#define bufferevent_socket_connect_hostname gl_bufferevent_socket_connect_hostname
#define bufferevent_socket_get_dns_error gl_bufferevent_socket_get_dns_error
#define bufferevent_socket_new gl_bufferevent_socket_new
#define bufferevent_filter_new gl_bufferevent_filter_new
#define bufferevent_ops_filter gl_bufferevent_ops_filter
#define bufferevent_ops_pair gl_bufferevent_ops_pair
#define bufferevent_pair_get_partner gl_bufferevent_pair_get_partner
#define bufferevent_pair_new gl_bufferevent_pair_new
#define evconnlistener_disable gl_evconnlistener_disable
#define evconnlistener_enable gl_evconnlistener_enable
#define evconnlistener_free gl_evconnlistener_free
#define evconnlistener_get_base gl_evconnlistener_get_base
#define evconnlistener_get_fd gl_evconnlistener_get_fd
#define evconnlistener_new gl_evconnlistener_new
#define evconnlistener_new_bind gl_evconnlistener_new_bind
#define evconnlistener_set_cb gl_evconnlistener_set_cb
#define evconnlistener_set_error_cb gl_evconnlistener_set_error_cb
#define bufferevent_add_to_rate_limit_group gl_bufferevent_add_to_rate_limit_group
#define _bufferevent_decrement_read_buckets gl__bufferevent_decrement_read_buckets
#define bufferevent_decrement_read_limit gl_bufferevent_decrement_read_limit
#define _bufferevent_decrement_write_buckets gl__bufferevent_decrement_write_buckets
#define bufferevent_decrement_write_limit gl_bufferevent_decrement_write_limit
#define bufferevent_get_max_to_read gl_bufferevent_get_max_to_read
#define bufferevent_get_max_to_write gl_bufferevent_get_max_to_write
#define bufferevent_get_read_limit gl_bufferevent_get_read_limit
#define _bufferevent_get_read_max gl__bufferevent_get_read_max
#define bufferevent_get_write_limit gl_bufferevent_get_write_limit
#define _bufferevent_get_write_max gl__bufferevent_get_write_max
#define bufferevent_rate_limit_group_decrement_read gl_bufferevent_rate_limit_group_decrement_read
#define bufferevent_rate_limit_group_decrement_write gl_bufferevent_rate_limit_group_decrement_write
#define bufferevent_rate_limit_group_free gl_bufferevent_rate_limit_group_free
#define bufferevent_rate_limit_group_get_read_limit gl_bufferevent_rate_limit_group_get_read_limit
#define bufferevent_rate_limit_group_get_totals gl_bufferevent_rate_limit_group_get_totals
#define bufferevent_rate_limit_group_get_write_limit gl_bufferevent_rate_limit_group_get_write_limit
#define bufferevent_rate_limit_group_new gl_bufferevent_rate_limit_group_new
#define bufferevent_rate_limit_group_reset_totals gl_bufferevent_rate_limit_group_reset_totals
#define bufferevent_rate_limit_group_set_cfg gl_bufferevent_rate_limit_group_set_cfg
#define bufferevent_rate_limit_group_set_min_share gl_bufferevent_rate_limit_group_set_min_share
#define bufferevent_remove_from_rate_limit_group gl_bufferevent_remove_from_rate_limit_group
#define bufferevent_remove_from_rate_limit_group_internal gl_bufferevent_remove_from_rate_limit_group_internal
#define bufferevent_set_rate_limit gl_bufferevent_set_rate_limit
#define ev_token_bucket_cfg_free gl_ev_token_bucket_cfg_free
#define ev_token_bucket_cfg_new gl_ev_token_bucket_cfg_new
#define ev_token_bucket_get_tick gl_ev_token_bucket_get_tick
#define ev_token_bucket_init gl_ev_token_bucket_init
#define ev_token_bucket_update gl_ev_token_bucket_update
#define event_changelist_add gl_event_changelist_add
#define event_changelist_del gl_event_changelist_del
#define event_changelist_freemem gl_event_changelist_freemem
#define event_changelist_init gl_event_changelist_init
#define event_changelist_remove_all gl_event_changelist_remove_all
#define evmap_check_integrity gl_evmap_check_integrity
#define evmap_io_active gl_evmap_io_active
#define evmap_io_add gl_evmap_io_add
#define evmap_io_clear gl_evmap_io_clear
#define evmap_io_del gl_evmap_io_del
#define evmap_io_get_fdinfo gl_evmap_io_get_fdinfo
#define evmap_io_initmap gl_evmap_io_initmap
#define evmap_signal_active gl_evmap_signal_active
#define evmap_signal_add gl_evmap_signal_add
#define evmap_signal_clear gl_evmap_signal_clear
#define evmap_signal_del gl_evmap_signal_del
#define evmap_signal_initmap gl_evmap_signal_initmap
#define _event_debugx gl__event_debugx
#define event_err gl_event_err
#define event_errx gl_event_errx
#define event_msgx gl_event_msgx
#define event_set_fatal_callback gl_event_set_fatal_callback
#define event_set_log_callback gl_event_set_log_callback
#define event_sock_err gl_event_sock_err
#define event_sock_warn gl_event_sock_warn
#define event_warn gl_event_warn
#define event_warnx gl_event_warnx
#define evutil_addrinfo_append gl_evutil_addrinfo_append
#define evutil_adjust_hints_for_addrconfig gl_evutil_adjust_hints_for_addrconfig
#define evutil_ascii_strcasecmp gl_evutil_ascii_strcasecmp
#define evutil_ascii_strncasecmp gl_evutil_ascii_strncasecmp
#define evutil_closesocket gl_evutil_closesocket
#define evutil_ersatz_socketpair gl_evutil_ersatz_socketpair
#define evutil_format_sockaddr_port gl_evutil_format_sockaddr_port
#define evutil_freeaddrinfo gl_evutil_freeaddrinfo
#define evutil_gai_strerror gl_evutil_gai_strerror
#define evutil_getaddrinfo gl_evutil_getaddrinfo
#define evutil_getaddrinfo_async gl_evutil_getaddrinfo_async
#define evutil_getaddrinfo_common gl_evutil_getaddrinfo_common
#define evutil_getenv gl_evutil_getenv
#define evutil_hex_char_to_int gl_evutil_hex_char_to_int
#define evutil_inet_ntop gl_evutil_inet_ntop
#define evutil_inet_pton gl_evutil_inet_pton
#define EVUTIL_ISALNUM gl_EVUTIL_ISALNUM
#define EVUTIL_ISALPHA gl_EVUTIL_ISALPHA
#define EVUTIL_ISDIGIT gl_EVUTIL_ISDIGIT
#define EVUTIL_ISLOWER gl_EVUTIL_ISLOWER
#define EVUTIL_ISPRINT gl_EVUTIL_ISPRINT
#define EVUTIL_ISSPACE gl_EVUTIL_ISSPACE
#define EVUTIL_ISUPPER gl_EVUTIL_ISUPPER
#define EVUTIL_ISXDIGIT gl_EVUTIL_ISXDIGIT
#define evutil_make_listen_socket_reuseable gl_evutil_make_listen_socket_reuseable
#define evutil_make_socket_closeonexec gl_evutil_make_socket_closeonexec
#define evutil_make_socket_nonblocking gl_evutil_make_socket_nonblocking
#define evutil_new_addrinfo gl_evutil_new_addrinfo
#define evutil_open_closeonexec gl_evutil_open_closeonexec
#define evutil_parse_sockaddr_port gl_evutil_parse_sockaddr_port
#define evutil_read_file gl_evutil_read_file
#define evutil_set_evdns_getaddrinfo_fn gl_evutil_set_evdns_getaddrinfo_fn
#define evutil_snprintf gl_evutil_snprintf
#define evutil_sockaddr_cmp gl_evutil_sockaddr_cmp
#define evutil_sockaddr_is_loopback gl_evutil_sockaddr_is_loopback
#define evutil_socket_connect gl_evutil_socket_connect
#define evutil_socket_finished_connecting gl_evutil_socket_finished_connecting
#define evutil_socketpair gl_evutil_socketpair
#define evutil_strtoll gl_evutil_strtoll
#define EVUTIL_TOLOWER gl_EVUTIL_TOLOWER
#define EVUTIL_TOUPPER gl_EVUTIL_TOUPPER
#define evutil_tv_to_msec gl_evutil_tv_to_msec
#define evutil_vsnprintf gl_evutil_vsnprintf
#define _evutil_weakrand gl__evutil_weakrand
#define evutil_secure_rng_add_bytes gl_evutil_secure_rng_add_bytes
#define evutil_secure_rng_get_bytes gl_evutil_secure_rng_get_bytes
#define evutil_secure_rng_global_setup_locks_ gl_evutil_secure_rng_global_setup_locks_
#define evutil_secure_rng_init gl_evutil_secure_rng_init
#define _event_strlcpy gl__event_strlcpy
#define selectops gl_selectops
#define pollops gl_pollops
#define epollops gl_epollops
#define evsig_dealloc gl_evsig_dealloc
#define evsig_global_setup_locks_ gl_evsig_global_setup_locks_
#define evsig_init gl_evsig_init
#define _evsig_restore_handler gl__evsig_restore_handler
#define evsig_set_base gl_evsig_set_base
#define _evsig_set_handler gl__evsig_set_handler
#define evtag_consume gl_evtag_consume
#define evtag_decode_int gl_evtag_decode_int
#define evtag_decode_int64 gl_evtag_decode_int64
#define evtag_decode_tag gl_evtag_decode_tag
#define evtag_encode_int gl_evtag_encode_int
#define evtag_encode_int64 gl_evtag_encode_int64
#define evtag_encode_tag gl_evtag_encode_tag
#define evtag_init gl_evtag_init
#define evtag_marshal gl_evtag_marshal
#define evtag_marshal_buffer gl_evtag_marshal_buffer
#define evtag_marshal_int gl_evtag_marshal_int
#define evtag_marshal_int64 gl_evtag_marshal_int64
#define evtag_marshal_string gl_evtag_marshal_string
#define evtag_marshal_timeval gl_evtag_marshal_timeval
#define evtag_payload_length gl_evtag_payload_length
#define evtag_peek gl_evtag_peek
#define evtag_peek_length gl_evtag_peek_length
#define evtag_unmarshal gl_evtag_unmarshal
#define evtag_unmarshal_fixed gl_evtag_unmarshal_fixed
#define evtag_unmarshal_header gl_evtag_unmarshal_header
#define evtag_unmarshal_int gl_evtag_unmarshal_int
#define evtag_unmarshal_int64 gl_evtag_unmarshal_int64
#define evtag_unmarshal_string gl_evtag_unmarshal_string
#define evtag_unmarshal_timeval gl_evtag_unmarshal_timeval
#define evhttp_accept_socket gl_evhttp_accept_socket
#define evhttp_accept_socket_with_handle gl_evhttp_accept_socket_with_handle
#define evhttp_add_header gl_evhttp_add_header
#define evhttp_add_server_alias gl_evhttp_add_server_alias
#define evhttp_add_virtual_host gl_evhttp_add_virtual_host
#define evhttp_bind_listener gl_evhttp_bind_listener
#define evhttp_bind_socket gl_evhttp_bind_socket
#define evhttp_bind_socket_with_handle gl_evhttp_bind_socket_with_handle
#define evhttp_bound_socket_get_fd gl_evhttp_bound_socket_get_fd
#define evhttp_bound_socket_get_listener gl_evhttp_bound_socket_get_listener
#define evhttp_cancel_request gl_evhttp_cancel_request
#define evhttp_clear_headers gl_evhttp_clear_headers
#define evhttp_connection_base_new gl_evhttp_connection_base_new
#define evhttp_connection_connect gl_evhttp_connection_connect
#define evhttp_connection_fail gl_evhttp_connection_fail
#define evhttp_connection_free gl_evhttp_connection_free
#define evhttp_connection_get_base gl_evhttp_connection_get_base
#define evhttp_connection_get_bufferevent gl_evhttp_connection_get_bufferevent
#define evhttp_connection_get_peer gl_evhttp_connection_get_peer
#define evhttp_connection_new gl_evhttp_connection_new
#define evhttp_connection_reset gl_evhttp_connection_reset
#define evhttp_connection_set_base gl_evhttp_connection_set_base
#define evhttp_connection_set_closecb gl_evhttp_connection_set_closecb
#define evhttp_connection_set_local_address gl_evhttp_connection_set_local_address
#define evhttp_connection_set_local_port gl_evhttp_connection_set_local_port
#define evhttp_connection_set_max_body_size gl_evhttp_connection_set_max_body_size
#define evhttp_connection_set_max_headers_size gl_evhttp_connection_set_max_headers_size
#define evhttp_connection_set_retries gl_evhttp_connection_set_retries
#define evhttp_connection_set_timeout gl_evhttp_connection_set_timeout
#define evhttp_decode_uri gl_evhttp_decode_uri
#define evhttp_del_accept_socket gl_evhttp_del_accept_socket
#define evhttp_del_cb gl_evhttp_del_cb
#define evhttp_encode_uri gl_evhttp_encode_uri
#define evhttp_find_header gl_evhttp_find_header
#define evhttp_free gl_evhttp_free
#define evhttp_htmlescape gl_evhttp_htmlescape
#define evhttp_make_request gl_evhttp_make_request
#define evhttp_new gl_evhttp_new
#define evhttp_parse_firstline gl_evhttp_parse_firstline
#define evhttp_parse_headers gl_evhttp_parse_headers
#define evhttp_parse_query gl_evhttp_parse_query
#define evhttp_parse_query_str gl_evhttp_parse_query_str
#define evhttp_remove_header gl_evhttp_remove_header
#define evhttp_remove_server_alias gl_evhttp_remove_server_alias
#define evhttp_remove_virtual_host gl_evhttp_remove_virtual_host
#define evhttp_request_free gl_evhttp_request_free
#define evhttp_request_get_command gl_evhttp_request_get_command
#define evhttp_request_get_connection gl_evhttp_request_get_connection
#define evhttp_request_get_evhttp_uri gl_evhttp_request_get_evhttp_uri
#define evhttp_request_get_host gl_evhttp_request_get_host
#define evhttp_request_get_input_buffer gl_evhttp_request_get_input_buffer
#define evhttp_request_get_input_headers gl_evhttp_request_get_input_headers
#define evhttp_request_get_output_buffer gl_evhttp_request_get_output_buffer
#define evhttp_request_get_output_headers gl_evhttp_request_get_output_headers
#define evhttp_request_get_response_code gl_evhttp_request_get_response_code
#define evhttp_request_get_uri gl_evhttp_request_get_uri
#define evhttp_request_is_owned gl_evhttp_request_is_owned
#define evhttp_request_new gl_evhttp_request_new
#define evhttp_request_own gl_evhttp_request_own
#define evhttp_request_set_chunked_cb gl_evhttp_request_set_chunked_cb
#define evhttp_response_code gl_evhttp_response_code
#define evhttp_send_error gl_evhttp_send_error
#define evhttp_send_page gl_evhttp_send_page
#define evhttp_send_reply gl_evhttp_send_reply
#define evhttp_send_reply_chunk gl_evhttp_send_reply_chunk
#define evhttp_send_reply_end gl_evhttp_send_reply_end
#define evhttp_send_reply_start gl_evhttp_send_reply_start
#define evhttp_set_allowed_methods gl_evhttp_set_allowed_methods
#define evhttp_set_cb gl_evhttp_set_cb
#define evhttp_set_gencb gl_evhttp_set_gencb
#define evhttp_set_max_body_size gl_evhttp_set_max_body_size
#define evhttp_set_max_headers_size gl_evhttp_set_max_headers_size
#define evhttp_set_timeout gl_evhttp_set_timeout
#define evhttp_start gl_evhttp_start
#define evhttp_start_read gl_evhttp_start_read
#define evhttp_uridecode gl_evhttp_uridecode
#define evhttp_uriencode gl_evhttp_uriencode
#define evhttp_uri_free gl_evhttp_uri_free
#define evhttp_uri_get_fragment gl_evhttp_uri_get_fragment
#define evhttp_uri_get_host gl_evhttp_uri_get_host
#define evhttp_uri_get_path gl_evhttp_uri_get_path
#define evhttp_uri_get_port gl_evhttp_uri_get_port
#define evhttp_uri_get_query gl_evhttp_uri_get_query
#define evhttp_uri_get_scheme gl_evhttp_uri_get_scheme
#define evhttp_uri_get_userinfo gl_evhttp_uri_get_userinfo
#define evhttp_uri_join gl_evhttp_uri_join
#define evhttp_uri_new gl_evhttp_uri_new
#define evhttp_uri_parse gl_evhttp_uri_parse
#define evhttp_uri_parse_with_flags gl_evhttp_uri_parse_with_flags
#define evhttp_uri_set_flags gl_evhttp_uri_set_flags
#define evhttp_uri_set_fragment gl_evhttp_uri_set_fragment
#define evhttp_uri_set_host gl_evhttp_uri_set_host
#define evhttp_uri_set_path gl_evhttp_uri_set_path
#define evhttp_uri_set_port gl_evhttp_uri_set_port
#define evhttp_uri_set_query gl_evhttp_uri_set_query
#define evhttp_uri_set_scheme gl_evhttp_uri_set_scheme
#define evhttp_uri_set_userinfo gl_evhttp_uri_set_userinfo
#define evdns_add_server_port gl_evdns_add_server_port
#define evdns_add_server_port_with_base gl_evdns_add_server_port_with_base
#define evdns_base_clear_nameservers_and_suspend gl_evdns_base_clear_nameservers_and_suspend
#define evdns_base_count_nameservers gl_evdns_base_count_nameservers
#define evdns_base_free gl_evdns_base_free
#define evdns_base_load_hosts gl_evdns_base_load_hosts
#define evdns_base_nameserver_add gl_evdns_base_nameserver_add
#define evdns_base_nameserver_ip_add gl_evdns_base_nameserver_ip_add
#define evdns_base_nameserver_sockaddr_add gl_evdns_base_nameserver_sockaddr_add
#define evdns_base_new gl_evdns_base_new
#define evdns_base_resolv_conf_parse gl_evdns_base_resolv_conf_parse
#define evdns_base_resolve_ipv4 gl_evdns_base_resolve_ipv4
#define evdns_base_resolve_ipv6 gl_evdns_base_resolve_ipv6
#define evdns_base_resolve_reverse gl_evdns_base_resolve_reverse
#define evdns_base_resolve_reverse_ipv6 gl_evdns_base_resolve_reverse_ipv6
#define evdns_base_resume gl_evdns_base_resume
#define evdns_base_search_add gl_evdns_base_search_add
#define evdns_base_search_clear gl_evdns_base_search_clear
#define evdns_base_search_ndots_set gl_evdns_base_search_ndots_set
#define evdns_base_set_option gl_evdns_base_set_option
#define evdns_cancel_request gl_evdns_cancel_request
#define evdns_clear_nameservers_and_suspend gl_evdns_clear_nameservers_and_suspend
#define evdns_close_server_port gl_evdns_close_server_port
#define evdns_count_nameservers gl_evdns_count_nameservers
#define evdns_err_to_string gl_evdns_err_to_string
#define evdns_getaddrinfo gl_evdns_getaddrinfo
#define evdns_getaddrinfo_cancel gl_evdns_getaddrinfo_cancel
#define evdns_get_global_base gl_evdns_get_global_base
#define evdns_init gl_evdns_init
#define evdns_nameserver_add gl_evdns_nameserver_add
#define evdns_nameserver_ip_add gl_evdns_nameserver_ip_add
#define evdns_resolv_conf_parse gl_evdns_resolv_conf_parse
#define evdns_resolve_ipv4 gl_evdns_resolve_ipv4
#define evdns_resolve_ipv6 gl_evdns_resolve_ipv6
#define evdns_resolve_reverse gl_evdns_resolve_reverse
#define evdns_resolve_reverse_ipv6 gl_evdns_resolve_reverse_ipv6
#define evdns_resume gl_evdns_resume
#define evdns_search_add gl_evdns_search_add
#define evdns_search_clear gl_evdns_search_clear
#define evdns_search_ndots_set gl_evdns_search_ndots_set
#define evdns_server_request_add_aaaa_reply gl_evdns_server_request_add_aaaa_reply
#define evdns_server_request_add_a_reply gl_evdns_server_request_add_a_reply
#define evdns_server_request_add_cname_reply gl_evdns_server_request_add_cname_reply
#define evdns_server_request_add_ptr_reply gl_evdns_server_request_add_ptr_reply
#define evdns_server_request_add_reply gl_evdns_server_request_add_reply
#define evdns_server_request_drop gl_evdns_server_request_drop
#define evdns_server_request_get_requesting_addr gl_evdns_server_request_get_requesting_addr
#define evdns_server_request_respond gl_evdns_server_request_respond
#define evdns_server_request_set_flags gl_evdns_server_request_set_flags
#define evdns_set_log_fn gl_evdns_set_log_fn
#define evdns_set_option gl_evdns_set_option
#define evdns_set_random_bytes_fn gl_evdns_set_random_bytes_fn
#define evdns_set_transaction_id_fn gl_evdns_set_transaction_id_fn
#define evdns_shutdown gl_evdns_shutdown
#define evrpc_add_hook gl_evrpc_add_hook
#define evrpc_free gl_evrpc_free
#define evrpc_get_reply gl_evrpc_get_reply
#define evrpc_get_request gl_evrpc_get_request
#define evrpc_hook_add_meta gl_evrpc_hook_add_meta
#define evrpc_hook_find_meta gl_evrpc_hook_find_meta
#define evrpc_hook_get_connection gl_evrpc_hook_get_connection
#define evrpc_init gl_evrpc_init
#define evrpc_make_request gl_evrpc_make_request
#define evrpc_make_request_ctx gl_evrpc_make_request_ctx
#define evrpc_pool_add_connection gl_evrpc_pool_add_connection
#define evrpc_pool_free gl_evrpc_pool_free
#define evrpc_pool_new gl_evrpc_pool_new
#define evrpc_pool_remove_connection gl_evrpc_pool_remove_connection
#define evrpc_pool_set_timeout gl_evrpc_pool_set_timeout
#define evrpc_register_generic gl_evrpc_register_generic
#define evrpc_register_rpc gl_evrpc_register_rpc
#define evrpc_remove_hook gl_evrpc_remove_hook
#define evrpc_reqstate_free gl_evrpc_reqstate_free
#define evrpc_request_done gl_evrpc_request_done
#define evrpc_request_get_pool gl_evrpc_request_get_pool
#define evrpc_request_set_cb gl_evrpc_request_set_cb
#define evrpc_request_set_pool gl_evrpc_request_set_pool
#define evrpc_resume_request gl_evrpc_resume_request
#define evrpc_send_request_generic gl_evrpc_send_request_generic
#define evrpc_unregister_rpc gl_evrpc_unregister_rpc


================================================
FILE: src/graphlab/rpc/evwrapundef.h
================================================
#undef event_active
#undef event_active_nolock
#undef event_add
#undef event_assign
#undef event_base_add_virtual
#undef event_base_assert_ok
#undef event_base_del_virtual
#undef event_base_dispatch
#undef event_base_dump_events
#undef event_base_free
#undef event_base_get_deferred_cb_queue
#undef event_base_get_features
#undef event_base_get_method
#undef event_base_gettimeofday_cached
#undef event_base_got_break
#undef event_base_got_exit
#undef event_base_init_common_timeout
#undef event_base_loop
#undef event_base_loopbreak
#undef event_base_loopexit
#undef event_base_new
#undef event_base_new_with_config
#undef event_base_once
#undef event_base_priority_init
#undef event_base_set
#undef event_base_start_iocp
#undef event_base_stop_iocp
#undef event_config_avoid_method
#undef event_config_free
#undef event_config_new
#undef event_config_require_features
#undef event_config_set_flag
#undef event_config_set_num_cpus_hint
#undef event_debug_map_HT_CLEAR
#undef event_debug_map_HT_GROW
#undef _event_debug_map_HT_REP_IS_BAD
#undef _event_debug_mode_on
#undef event_debug_unassign
#undef event_deferred_cb_cancel
#undef event_deferred_cb_init
#undef event_deferred_cb_queue_init
#undef event_deferred_cb_schedule
#undef event_del
#undef event_dispatch
#undef event_enable_debug_mode
#undef event_free
#undef event_get_assignment
#undef event_get_base
#undef event_get_callback
#undef event_get_callback_arg
#undef event_get_events
#undef event_get_fd
#undef event_get_method
#undef event_get_struct_event_size
#undef event_get_supported_methods
#undef event_get_version
#undef event_get_version_number
#undef event_global_current_base_
#undef event_global_setup_locks_
#undef event_init
#undef event_initialized
#undef event_loop
#undef event_loopbreak
#undef event_loopexit
#undef event_mm_calloc_
#undef event_mm_free_
#undef event_mm_malloc_
#undef event_mm_realloc_
#undef event_mm_strdup_
#undef event_new
#undef event_once
#undef event_pending
#undef event_priority_init
#undef event_priority_set
#undef event_reinit
#undef event_set
#undef event_set_mem_functions
#undef evthread_make_base_notifiable
#undef _evthread_cond_fns
#undef _evthread_debug_get_real_lock
#undef evthread_enable_lock_debuging
#undef _evthread_id_fn
#undef _evthread_is_debug_lock_held
#undef _evthread_lock_debugging_enabled
#undef _evthread_lock_fns
#undef evthread_set_condition_callbacks
#undef evthread_set_id_callback
#undef evthread_set_lock_callbacks
#undef evthread_setup_global_lock_
#undef evbuffer_add
#undef evbuffer_add_buffer
#undef evbuffer_add_cb
#undef evbuffer_add_file
#undef evbuffer_add_printf
#undef evbuffer_add_reference
#undef evbuffer_add_vprintf
#undef evbuffer_cb_clear_flags
#undef evbuffer_cb_set_flags
#undef _evbuffer_chain_pin
#undef _evbuffer_chain_unpin
#undef evbuffer_clear_flags
#undef evbuffer_commit_space
#undef evbuffer_copyout
#undef _evbuffer_decref_and_unlock
#undef evbuffer_defer_callbacks
#undef evbuffer_drain
#undef evbuffer_enable_locking
#undef evbuffer_expand
#undef _evbuffer_expand_fast
#undef evbuffer_find
#undef evbuffer_free
#undef evbuffer_freeze
#undef evbuffer_get_contiguous_space
#undef evbuffer_get_length
#undef _evbuffer_incref
#undef _evbuffer_incref_and_lock
#undef evbuffer_invoke_callbacks
#undef evbuffer_lock
#undef evbuffer_new
#undef evbuffer_peek
#undef evbuffer_prepend
#undef evbuffer_prepend_buffer
#undef evbuffer_ptr_set
#undef evbuffer_pullup
#undef evbuffer_read
#undef evbuffer_readline
#undef evbuffer_readln
#undef _evbuffer_read_setup_vecs
#undef evbuffer_remove
#undef evbuffer_remove_buffer
#undef evbuffer_remove_cb
#undef evbuffer_remove_cb_entry
#undef evbuffer_reserve_space
#undef evbuffer_search
#undef evbuffer_search_eol
#undef evbuffer_search_range
#undef evbuffer_setcb
#undef evbuffer_set_flags
#undef evbuffer_set_parent
#undef _evbuffer_testing_use_linear_file_access
#undef _evbuffer_testing_use_mmap
#undef _evbuffer_testing_use_sendfile
#undef evbuffer_unfreeze
#undef evbuffer_unlock
#undef evbuffer_write
#undef evbuffer_write_atmost
#undef _bufferevent_add_event
#undef bufferevent_decref
#undef _bufferevent_decref_and_unlock
#undef _bufferevent_del_generic_timeout_cbs
#undef bufferevent_disable
#undef bufferevent_disable_hard
#undef bufferevent_enable
#undef bufferevent_enable_locking
#undef bufferevent_flush
#undef bufferevent_free
#undef _bufferevent_generic_adj_timeouts
#undef bufferevent_get_base
#undef bufferevent_get_enabled
#undef bufferevent_getfd
#undef bufferevent_get_input
#undef bufferevent_get_output
#undef bufferevent_get_underlying
#undef bufferevent_incref
#undef _bufferevent_incref_and_lock
#undef bufferevent_init_common
#undef _bufferevent_init_generic_timeout_cbs
#undef bufferevent_lock
#undef bufferevent_read
#undef bufferevent_read_buffer
#undef _bufferevent_run_eventcb
#undef _bufferevent_run_readcb
#undef _bufferevent_run_writecb
#undef bufferevent_setcb
#undef bufferevent_setfd
#undef bufferevent_settimeout
#undef bufferevent_set_timeouts
#undef bufferevent_setwatermark
#undef bufferevent_suspend_read
#undef bufferevent_suspend_write
#undef bufferevent_unlock
#undef bufferevent_unsuspend_read
#undef bufferevent_unsuspend_write
#undef bufferevent_write
#undef bufferevent_write_buffer
#undef bufferevent_base_set
#undef bufferevent_new
#undef bufferevent_ops_socket
#undef bufferevent_priority_set
#undef bufferevent_socket_connect
#undef bufferevent_socket_connect_hostname
#undef bufferevent_socket_get_dns_error
#undef bufferevent_socket_new
#undef bufferevent_filter_new
#undef bufferevent_ops_filter
#undef bufferevent_ops_pair
#undef bufferevent_pair_get_partner
#undef bufferevent_pair_new
#undef evconnlistener_disable
#undef evconnlistener_enable
#undef evconnlistener_free
#undef evconnlistener_get_base
#undef evconnlistener_get_fd
#undef evconnlistener_new
#undef evconnlistener_new_bind
#undef evconnlistener_set_cb
#undef evconnlistener_set_error_cb
#undef bufferevent_add_to_rate_limit_group
#undef _bufferevent_decrement_read_buckets
#undef bufferevent_decrement_read_limit
#undef _bufferevent_decrement_write_buckets
#undef bufferevent_decrement_write_limit
#undef bufferevent_get_max_to_read
#undef bufferevent_get_max_to_write
#undef bufferevent_get_read_limit
#undef _bufferevent_get_read_max
#undef bufferevent_get_write_limit
#undef _bufferevent_get_write_max
#undef bufferevent_rate_limit_group_decrement_read
#undef bufferevent_rate_limit_group_decrement_write
#undef bufferevent_rate_limit_group_free
#undef bufferevent_rate_limit_group_get_read_limit
#undef bufferevent_rate_limit_group_get_totals
#undef bufferevent_rate_limit_group_get_write_limit
#undef bufferevent_rate_limit_group_new
#undef bufferevent_rate_limit_group_reset_totals
#undef bufferevent_rate_limit_group_set_cfg
#undef bufferevent_rate_limit_group_set_min_share
#undef bufferevent_remove_from_rate_limit_group
#undef bufferevent_remove_from_rate_limit_group_internal
#undef bufferevent_set_rate_limit
#undef ev_token_bucket_cfg_free
#undef ev_token_bucket_cfg_new
#undef ev_token_bucket_get_tick
#undef ev_token_bucket_init
#undef ev_token_bucket_update
#undef event_changelist_add
#undef event_changelist_del
#undef event_changelist_freemem
#undef event_changelist_init
#undef event_changelist_remove_all
#undef evmap_check_integrity
#undef evmap_io_active
#undef evmap_io_add
#undef evmap_io_clear
#undef evmap_io_del
#undef evmap_io_get_fdinfo
#undef evmap_io_initmap
#undef evmap_signal_active
#undef evmap_signal_add
#undef evmap_signal_clear
#undef evmap_signal_del
#undef evmap_signal_initmap
#undef _event_debugx
#undef event_err
#undef event_errx
#undef event_msgx
#undef event_set_fatal_callback
#undef event_set_log_callback
#undef event_sock_err
#undef event_sock_warn
#undef event_warn
#undef event_warnx
#undef evutil_addrinfo_append
#undef evutil_adjust_hints_for_addrconfig
#undef evutil_ascii_strcasecmp
#undef evutil_ascii_strncasecmp
#undef evutil_closesocket
#undef evutil_ersatz_socketpair
#undef evutil_format_sockaddr_port
#undef evutil_freeaddrinfo
#undef evutil_gai_strerror
#undef evutil_getaddrinfo
#undef evutil_getaddrinfo_async
#undef evutil_getaddrinfo_common
#undef evutil_getenv
#undef evutil_hex_char_to_int
#undef evutil_inet_ntop
#undef evutil_inet_pton
#undef EVUTIL_ISALNUM
#undef EVUTIL_ISALPHA
#undef EVUTIL_ISDIGIT
#undef EVUTIL_ISLOWER
#undef EVUTIL_ISPRINT
#undef EVUTIL_ISSPACE
#undef EVUTIL_ISUPPER
#undef EVUTIL_ISXDIGIT
#undef evutil_make_listen_socket_reuseable
#undef evutil_make_socket_closeonexec
#undef evutil_make_socket_nonblocking
#undef evutil_new_addrinfo
#undef evutil_open_closeonexec
#undef evutil_parse_sockaddr_port
#undef evutil_read_file
#undef evutil_set_evdns_getaddrinfo_fn
#undef evutil_snprintf
#undef evutil_sockaddr_cmp
#undef evutil_sockaddr_is_loopback
#undef evutil_socket_connect
#undef evutil_socket_finished_connecting
#undef evutil_socketpair
#undef evutil_strtoll
#undef EVUTIL_TOLOWER
#undef EVUTIL_TOUPPER
#undef evutil_tv_to_msec
#undef evutil_vsnprintf
#undef _evutil_weakrand
#undef evutil_secure_rng_add_bytes
#undef evutil_secure_rng_get_bytes
#undef evutil_secure_rng_global_setup_locks_
#undef evutil_secure_rng_init
#undef _event_strlcpy
#undef selectops
#undef pollops
#undef epollops
#undef evsig_dealloc
#undef evsig_global_setup_locks_
#undef evsig_init
#undef _evsig_restore_handler
#undef evsig_set_base
#undef _evsig_set_handler
#undef evtag_consume
#undef evtag_decode_int
#undef evtag_decode_int64
#undef evtag_decode_tag
#undef evtag_encode_int
#undef evtag_encode_int64
#undef evtag_encode_tag
#undef evtag_init
#undef evtag_marshal
#undef evtag_marshal_buffer
#undef evtag_marshal_int
#undef evtag_marshal_int64
#undef evtag_marshal_string
#undef evtag_marshal_timeval
#undef evtag_payload_length
#undef evtag_peek
#undef evtag_peek_length
#undef evtag_unmarshal
#undef evtag_unmarshal_fixed
#undef evtag_unmarshal_header
#undef evtag_unmarshal_int
#undef evtag_unmarshal_int64
#undef evtag_unmarshal_string
#undef evtag_unmarshal_timeval
#undef evhttp_accept_socket
#undef evhttp_accept_socket_with_handle
#undef evhttp_add_header
#undef evhttp_add_server_alias
#undef evhttp_add_virtual_host
#undef evhttp_bind_listener
#undef evhttp_bind_socket
#undef evhttp_bind_socket_with_handle
#undef evhttp_bound_socket_get_fd
#undef evhttp_bound_socket_get_listener
#undef evhttp_cancel_request
#undef evhttp_clear_headers
#undef evhttp_connection_base_new
#undef evhttp_connection_connect
#undef evhttp_connection_fail
#undef evhttp_connection_free
#undef evhttp_connection_get_base
#undef evhttp_connection_get_bufferevent
#undef evhttp_connection_get_peer
#undef evhttp_connection_new
#undef evhttp_connection_reset
#undef evhttp_connection_set_base
#undef evhttp_connection_set_closecb
#undef evhttp_connection_set_local_address
#undef evhttp_connection_set_local_port
#undef evhttp_connection_set_max_body_size
#undef evhttp_connection_set_max_headers_size
#undef evhttp_connection_set_retries
#undef evhttp_connection_set_timeout
#undef evhttp_decode_uri
#undef evhttp_del_accept_socket
#undef evhttp_del_cb
#undef evhttp_encode_uri
#undef evhttp_find_header
#undef evhttp_free
#undef evhttp_htmlescape
#undef evhttp_make_request
#undef evhttp_new
#undef evhttp_parse_firstline
#undef evhttp_parse_headers
#undef evhttp_parse_query
#undef evhttp_parse_query_str
#undef evhttp_remove_header
#undef evhttp_remove_server_alias
#undef evhttp_remove_virtual_host
#undef evhttp_request_free
#undef evhttp_request_get_command
#undef evhttp_request_get_connection
#undef evhttp_request_get_evhttp_uri
#undef evhttp_request_get_host
#undef evhttp_request_get_input_buffer
#undef evhttp_request_get_input_headers
#undef evhttp_request_get_output_buffer
#undef evhttp_request_get_output_headers
#undef evhttp_request_get_response_code
#undef evhttp_request_get_uri
#undef evhttp_request_is_owned
#undef evhttp_request_new
#undef evhttp_request_own
#undef evhttp_request_set_chunked_cb
#undef evhttp_response_code
#undef evhttp_send_error
#undef evhttp_send_page
#undef evhttp_send_reply
#undef evhttp_send_reply_chunk
#undef evhttp_send_reply_end
#undef evhttp_send_reply_start
#undef evhttp_set_allowed_methods
#undef evhttp_set_cb
#undef evhttp_set_gencb
#undef evhttp_set_max_body_size
#undef evhttp_set_max_headers_size
#undef evhttp_set_timeout
#undef evhttp_start
#undef evhttp_start_read
#undef evhttp_uridecode
#undef evhttp_uriencode
#undef evhttp_uri_free
#undef evhttp_uri_get_fragment
#undef evhttp_uri_get_host
#undef evhttp_uri_get_path
#undef evhttp_uri_get_port
#undef evhttp_uri_get_query
#undef evhttp_uri_get_scheme
#undef evhttp_uri_get_userinfo
#undef evhttp_uri_join
#undef evhttp_uri_new
#undef evhttp_uri_parse
#undef evhttp_uri_parse_with_flags
#undef evhttp_uri_set_flags
#undef evhttp_uri_set_fragment
#undef evhttp_uri_set_host
#undef evhttp_uri_set_path
#undef evhttp_uri_set_port
#undef evhttp_uri_set_query
#undef evhttp_uri_set_scheme
#undef evhttp_uri_set_userinfo
#undef evdns_add_server_port
#undef evdns_add_server_port_with_base
#undef evdns_base_clear_nameservers_and_suspend
#undef evdns_base_count_nameservers
#undef evdns_base_free
#undef evdns_base_load_hosts
#undef evdns_base_nameserver_add
#undef evdns_base_nameserver_ip_add
#undef evdns_base_nameserver_sockaddr_add
#undef evdns_base_new
#undef evdns_base_resolv_conf_parse
#undef evdns_base_resolve_ipv4
#undef evdns_base_resolve_ipv6
#undef evdns_base_resolve_reverse
#undef evdns_base_resolve_reverse_ipv6
#undef evdns_base_resume
#undef evdns_base_search_add
#undef evdns_base_search_clear
#undef evdns_base_search_ndots_set
#undef evdns_base_set_option
#undef evdns_cancel_request
#undef evdns_clear_nameservers_and_suspend
#undef evdns_close_server_port
#undef evdns_count_nameservers
#undef evdns_err_to_string
#undef evdns_getaddrinfo
#undef evdns_getaddrinfo_cancel
#undef evdns_get_global_base
#undef evdns_init
#undef evdns_nameserver_add
#undef evdns_nameserver_ip_add
#undef evdns_resolv_conf_parse
#undef evdns_resolve_ipv4
#undef evdns_resolve_ipv6
#undef evdns_resolve_reverse
#undef evdns_resolve_reverse_ipv6
#undef evdns_resume
#undef evdns_search_add
#undef evdns_search_clear
#undef evdns_search_ndots_set
#undef evdns_server_request_add_aaaa_reply
#undef evdns_server_request_add_a_reply
#undef evdns_server_request_add_cname_reply
#undef evdns_server_request_add_ptr_reply
#undef evdns_server_request_add_reply
#undef evdns_server_request_drop
#undef evdns_server_request_get_requesting_addr
#undef evdns_server_request_respond
#undef evdns_server_request_set_flags
#undef evdns_set_log_fn
#undef evdns_set_option
#undef evdns_set_random_bytes_fn
#undef evdns_set_transaction_id_fn
#undef evdns_shutdown
#undef evrpc_add_hook
#undef evrpc_free
#undef evrpc_get_reply
#undef evrpc_get_request
#undef evrpc_hook_add_meta
#undef evrpc_hook_find_meta
#undef evrpc_hook_get_connection
#undef evrpc_init
#undef evrpc_make_request
#undef evrpc_make_request_ctx
#undef evrpc_pool_add_connection
#undef evrpc_pool_free
#undef evrpc_pool_new
#undef evrpc_pool_remove_connection
#undef evrpc_pool_set_timeout
#undef evrpc_register_generic
#undef evrpc_register_rpc
#undef evrpc_remove_hook
#undef evrpc_reqstate_free
#undef evrpc_request_done
#undef evrpc_request_get_pool
#undef evrpc_request_set_cb
#undef evrpc_request_set_pool
#undef evrpc_resume_request
#undef evrpc_send_request_generic
#undef evrpc_unregister_rpc


================================================
FILE: src/graphlab/rpc/fiber_async_consensus.cpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/fiber_async_consensus.hpp>
#include <graphlab/parallel/fiber_control.hpp>
namespace graphlab {
  fiber_async_consensus::fiber_async_consensus(distributed_control &dc,
                                   size_t required_fibers_in_done,
                                   const dc_impl::dc_dist_object_base *attach)
    :rmi(dc, this), attachedobj(attach),
     last_calls_sent(0), last_calls_received(0),
     numactive(required_fibers_in_done),
     ncpus(required_fibers_in_done),
     done(false),
     trying_to_sleep(0),
     critical(ncpus, 0),
     sleeping(ncpus, 0),
     hastoken(dc.procid() == 0),
     cond(ncpus, 0){

    cur_token.total_calls_sent = 0;
    cur_token.total_calls_received = 0;
    cur_token.last_change = (procid_t)(rmi.numprocs() - 1);
  }

  void fiber_async_consensus::reset() {
    last_calls_sent = 0;
    last_calls_received = 0;
    numactive = ncpus;
    done = false;
    trying_to_sleep = false;
    critical = std::vector<char>(ncpus, 0);
    sleeping = std::vector<char>(ncpus, 0);
    hastoken = (rmi.procid() == 0);
    cur_token.total_calls_sent = 0;
    cur_token.total_calls_received = 0;
    cur_token.last_change = (procid_t)(rmi.numprocs() - 1);
  }

  void fiber_async_consensus::force_done() {
    m.lock();
    done = true;
    m.unlock();
    cancel();
  }

  void fiber_async_consensus::begin_done_critical_section(size_t cpuid) {
    trying_to_sleep.inc();
    critical[cpuid] = true;
    m.lock();
  }


  void fiber_async_consensus::cancel_critical_section(size_t cpuid) {
    m.unlock();
    critical[cpuid] = false;
    trying_to_sleep.dec();
  }
  
  bool fiber_async_consensus::end_done_critical_section(size_t cpuid) {
    // if done flag is set, quit immediately
    if (done) {
      m.unlock();
      critical[cpuid] = false;
      trying_to_sleep.dec();
      return true;
    }
    /*
      Assertion: Since numactive is decremented only within 
      a critical section, and is incremented only within the same critical 
      section. Therefore numactive is a valid counter of the number of fibers
      outside of this critical section. 
    */
    --numactive;
  
    /*
      Assertion: If numactive is ever 0 at this point, the algorithm is done.
      WLOG, let the current fiber which just decremented numactive be fiber 0
    
      Since there is only 1 active fiber (0), there must be no fibers 
      performing insertions, and are no other fibers which are waking up.
      All fibers must therefore be sleeping.
    */
    if (numactive == 0) {
      logstream(LOG_INFO) << rmi.procid() << ": Termination Possible" << std::endl;
      if (hastoken) pass_the_token();
    }
    sleeping[cpuid] = true;
    while(1) {
      // here we are protected by the mutex again.
      
      // woken up by someone else. leave the 
      // terminator
      if (sleeping[cpuid] == false || done) {
        break;
      }

      // put myself to sleep
      // this here is basically cond[cpuid].wait(m);
      cond[cpuid] = fiber_control::get_tid();
      ASSERT_NE(cond[cpuid], 0);
      fiber_control::deschedule_self(&m.m_mut);
      m.lock();
      cond[cpuid] = 0;
    }
    m.unlock();
    critical[cpuid] = false;
    trying_to_sleep.dec();
    return done;
  }
  
  void fiber_async_consensus::cancel() {
    /*
      Assertion: numactive > 0 if there is work to do.
      If there are fibers trying to sleep, lets wake them up
    */
    if (trying_to_sleep > 0 || numactive < ncpus) {
      m.lock();
      size_t oldnumactive = numactive;
      // once I acquire this lock, all fibers must be
      // in the following states
      // 1: still running and has not reached begin_critical_section()
      // 2: is sleeping in cond.wait()
      // 3: has called begin_critical_section() but has not acquired
      //    the mutex
      // In the case of 1,3: These fibers will perform one more sweep
      // of their task queues. Therefore they will see any new job if available
      // in the case of 2: numactive must be < ncpus since numactive
      // is mutex protected. Then I can wake them up by
      // clearing their sleeping flags and broadcasting.
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            // this here was basically cond[i].signal();
            if (cond[i] != 0) fiber_control::schedule_tid(cond[i]);
          }
        }
        if (oldnumactive == 0 && !done) {
          logstream(LOG_INFO) << rmi.procid() << ": Waking" << std::endl;
        }

      }
      m.unlock();
    }
  }

  void fiber_async_consensus::cancel_one(size_t cpuhint) {
    if (critical[cpuhint]) {
      m.lock();
      size_t oldnumactive = numactive;
      // see new_job() for detailed comments
      if (sleeping[cpuhint]) {
        numactive += sleeping[cpuhint];
        sleeping[cpuhint] = 0;
        if (oldnumactive == 0 && !done) {
          logstream(LOG_INFO) << rmi.procid() << ": Waking" << std::endl;
        }

        // this here was basically cond[cpuhint].signal();
        if (cond[cpuhint] != 0) fiber_control::schedule_tid(cond[cpuhint]);
        
      }
      m.unlock();
    }
  }

  void fiber_async_consensus::receive_the_token(token &tok) {
    m.lock();
    // save the token
    hastoken = true;
    cur_token = tok;
    // if I am waiting on done, pass the token.
    logstream(LOG_INFO) << rmi.procid() << ": Token Received" << std::endl;
    if (numactive == 0) {
      pass_the_token();
    }
    m.unlock();
  }

  void fiber_async_consensus::pass_the_token() {
    // note that this function does not acquire the token lock
    // the caller must acquire it 
    assert(hastoken);
    // first check if we are done
    if (cur_token.last_change == rmi.procid() && 
        cur_token.total_calls_received == cur_token.total_calls_sent) {
      logstream(LOG_INFO) << "Completed Token: " 
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;
      // we have completed a loop around!
      // broadcast a completion
      for (procid_t i = 0;i < rmi.numprocs(); ++i) {
        if (i != rmi.procid()) {
          rmi.control_call(i,
                           &fiber_async_consensus::force_done);
        }
      }
      // set the complete flag
      // we can't call consensus() since it will deadlock
      done = true;
      // this is the same code as cancel(), but we can't call cancel 
      // since we are holding on to a lock
      if (numactive < ncpus) {
        // this is safe. Note that it is done from within 
        // the critical section.
        for (size_t i = 0;i < ncpus; ++i) {
          numactive += sleeping[i];
          if (sleeping[i]) {
            sleeping[i] = 0;
            // this here is basically cond[i].signal();
            size_t ch = cond[i];
            if (ch != 0) fiber_control::schedule_tid(ch);
          }
        }
      }

    }
    else {
      // update the token
      size_t callsrecv;
      size_t callssent;
    
      if (attachedobj) {
        callsrecv = attachedobj->calls_received();
        callssent = attachedobj->calls_sent();
      }
      else {
        callsrecv = rmi.dc().calls_received();
        callssent = rmi.dc().calls_sent();
      }

      if (callssent != last_calls_sent ||
          callsrecv != last_calls_received) {
        cur_token.total_calls_sent += callssent - last_calls_sent;
        cur_token.total_calls_received += callsrecv - last_calls_received;
        cur_token.last_change = rmi.procid();
      }
      //std::cout << "Sending token: (" << cur_token.total_calls_sent
      //<< ", " << cur_token.total_calls_received << ")" << std::endl;

      last_calls_sent = callssent;
      last_calls_received = callsrecv;
      // send it along.
      hastoken = false;
      logstream(LOG_INFO) << "Passing Token " << rmi.procid() << "-->" 
                          << (rmi.procid() + 1) % rmi.numprocs() << ": "
                          << cur_token.total_calls_received << " " 
                          << cur_token.total_calls_sent << std::endl;

      rmi.control_call((procid_t)((rmi.procid() + 1) % rmi.numprocs()),
                       &fiber_async_consensus::receive_the_token,
                       cur_token);
    }
  }
}


================================================
FILE: src/graphlab/rpc/fiber_async_consensus.hpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef FIBER_ASYNC_TERMINATOR_HPP
#define FIBER_ASYNC_TERMINATOR_HPP

#include <graphlab/parallel/pthread_tools.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object_base.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>


namespace graphlab {
  /**
   * \ingroup rpc
   * \brief This implements a distributed consensus algorithm which waits
   * for global completion of all computation/RPC events on a given object.
   *
   * The use case is as follows:
   * 
   * A collection of fibers on a collection of distributed machines, each
   * running the following
   * 
   * \code
   * while (work to be done) {
   *    Do_stuff
   * }
   * \endcode
   * 
   * However, <tt>Do_stuff</tt> will include RPC calls which may introduce
   * work to other fibers/machines.
   * Figuring out when termination is possible is complex. For instance RPC calls 
   * could be in-flight and not yet received. This fiber_async_consensus class 
   * implements a solution built around the algorithm in
   * <i>Misra, J.: Detecting Termination of Distributed Computations Using Markers, SIGOPS, 1983</i>
   * extended to handle the mixed parallelism (distributed with threading) case.
   * 
   * The main loop of the user has to be modified to:
   * 
   * \code
   * done = false;
   * while (!done) {
   *    Do_stuff
   *    // if locally, I see that there is no work to be done
   *    // we begin the consensus checking
   *    if (no work to be done) {
   *      // begin the critical section and try again
   *      consensus.begin_done_critical_section();
   *      // if still no work to be done
   *      if (no work to be done) {
   *        done = consensus.end_done_critical_section();
   *      }
   *      else {
   *        consensus.cancel_critical_section();
   *      }
   *    }
   * }
   * 
   * \endcode
   * 
   * Additionally, incoming RPC calls which create work must ensure there are
   * active fiberswhich are capable of processing the work. An easy solution 
   * will be to simply cancel_one(). Other more optimized solutions
   * include keeping a counter of the number of active fibers, and only calling
   * cancel() or cancel_one() if all fibers are asleep. (Note that the optimized
   * solution does require some care to ensure dead-lock free execution).
   *
   * This class works with fibers. For a version which works with regular 
   * kernel threads see \ref graphlab::async_consensus .
   *
   * \see graphlab::async_consensus
   */
  class fiber_async_consensus {
  public:
    /** \brief Constructs an asynchronous consensus object
      *
      * The consensus procedure waits till all fibers have no work to do and are 
      * waiting in consensus, and there is no communication going on which
      * could wake up a thread. The consensus object is therefore associated
      * with a communication context, either a graphlab::dc_dist_object,
      * or the global context (the root distributed_control).
      * 
      * \param dc The distributed control object to use for communication
      * \param required_fibers_in_done local consensus is achieved if this many
      *                                 fibers are waiting for consensus locally.
      * \param attach The context to associate with. If NULL, we associate with
      *               the global context. 
      */
    fiber_async_consensus(distributed_control &dc, size_t required_fibers_in_done = 1,
                    const dc_impl::dc_dist_object_base* attach = NULL);


    /**
     * \brief A thread enters the critical section by calling
     * this function. 
     *  
     * After which it should check its termination 
     * condition locally. If termination condition
     * is still fulfilled, end_done_critical_section() should be called.
     * Otherwise cancel_critical_section() should be called.
     *
     * \param cpuid Thread ID of the thread.
     */
    void begin_done_critical_section(size_t cpuid);

    /**
     * \brief Leaves the critical section because termination condition
     * is not fullfilled.
     *
     * See begin_done_critical_section()
     * \param cpuid Thread ID of the thread.
     */
    void cancel_critical_section(size_t cpuid);

    /**
     * \brief Thread must call this function if termination condition
     * is fullfilled while in the critical section. 
     *
     * See begin_done_critical_section()
     * 
     * \param cpuid Thread ID of the thread.
     * \returns True if global consensus is achieved. False otherwise. 
     */
    bool end_done_critical_section(size_t cpuid);

    /**
     * \brief Forces the consensus to be set
     */
    void force_done();
  
    
    /// \brief Wakes up all local fibers waiting in done()
    void cancel();

    /// \brief Wakes up a specific thread waiting in done()
    void cancel_one(size_t cpuid);

    /// \brief Returns true if consensus is achieved.
    bool is_done() const {
      return done;
    }
    /** \brief Resets the consensus object. Must be called simultaneously by
     * exactly one thread on each machine.
     * This function is not safe to call while consensus is being achieved.
     */
    void reset();
 
  private:

    /**
     * The token object that is passed around the machines.
     * It counts the total number of RPC calls that has been sent
     * or received, as well as the machine which last changed the value.
     * When the token goes one full round with no change, consensus is
     * achieved.
     */
    struct token {
      size_t total_calls_sent;
      size_t total_calls_received;
      procid_t last_change;
      void save(oarchive &oarc) const {
        oarc << total_calls_sent << total_calls_received << last_change;
      }

      void load(iarchive &iarc) {
        iarc >> total_calls_sent >> total_calls_received >> last_change;
      }
    };

    
    dc_dist_object<fiber_async_consensus> rmi;
    const dc_impl::dc_dist_object_base* attachedobj;
  
    size_t last_calls_sent;
    size_t last_calls_received;

 
    /// counts the number of fibers which are not sleeping
    /// protected by the mutex
    size_t numactive; 
    
    /// Total number of CPUs
    size_t ncpus;
    
    /// once flag is set, the terminator is invalid, and all fibers
    /// should leave
    bool done;
    
    /// set if abort() is called
    // bool forced_abort;    
    
    /// Number of fibers which have called
    /// begin_critical_section(), and have not left end_critical_section()
    /// This is an atomic counter and is not protected.
    atomic<size_t> trying_to_sleep;
    
    /// critical[i] is set if thread i has called 
    /// begin_critical_section(), but has not left end_critical_section()
    /// sum of critical should be the same as trying_to_sleep
    std::vector<char> critical;
    
    /// sleeping[i] is set if fibers[i] is in cond.wait()
    std::vector<char> sleeping;
    
    
    bool hastoken;
    /// If I have the token, the value of the token
    token cur_token;

    mutex m;

    /*
     * Now, to work with fibers, the basic trick here is that the 
     * async_consensus implementation uses exactly one thread waiting on each
     * condition variable. As such, we can just replace this with a deschedule
     * operation.
     *
     * We set cond[cpuid] to the fiber ID if there is a thread
     * waiting on it, and 0 otherwise.
     */
    std::vector<size_t> cond;
      

    void receive_the_token(token &tok);
    void pass_the_token();
  };

}
#endif


================================================
FILE: src/graphlab/rpc/fiber_buffered_exchange.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIBER_BUFFERED_EXCHANGE_HPP
#define GRAPHLAB_FIBER_BUFFERED_EXCHANGE_HPP

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/util/mpi_tools.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \ingroup rpc
   *
   * The buffered exchange provides high performance exchange of bulk data 
   * between machines. This is like the \ref graphlab::buffered_exchange, but is
   * much stricter. All send calls must occur within fibers, and all recv calls
   * must occur within fibers. Specifically, a collection of "fiber-worker-local"
   * send buffer and receive buffers are created. Sends by fibers write into the
   * buffer owned by the current worker handling the fiber. Receives similarly
   * read from the buffer owned by the current worker handling the fiber.
   * As such a bit more subtlety is needed to use this class correctly.
   *
   * For instance, if we are doing bulk exchanges of integers:
   * \code
   * buffered_exchange<int> exchange(dc);
   *  .. In parallel in fibers .. {
   *    exchange.send([target machine], [value to send to target])
   *    exchange.partial_flush();
   *  }
   *
   *  .. now in 1 thread ..
   *  exchange.flush()
   *
   *  .. In parallel in fibers .. {
   *    procid_t proc;
   *    std::vector<buffered_exchange<int>::buffer_record> buffer; // (an array of buffers)
   *    while(exchange.recv(buffer)) {
   *      for each buffer_record in buffer:
   *        buffer_record.proc is the machine which sent the contents of this record
   *        buffer_record.buffer is an array containing values sent by the machine buffer_record.proc
   *    }
   *  }
   *  
   *  .. now in 1 thread ..
   *  exchange.recv(buffer, false); // get from all receive buffers
   *  while(exchange.recv(buffer)) {
   *    for each buffer_record in buffer:
   *      buffer_record.proc is the machine which sent the contents of this record
   *      buffer_record.buffer is an array containing values sent by the machine buffer_record.proc
   *  }
   * \endcode
   *
   * \note The buffered exchange sends data in the background, so recv can be
   * called even before the flush calls.
   * \note The last single threaded receive is not necessary if worker-affinity
   * is set correctly so that every worker is active in the parallel receiving
   * block.
   *
   * \see graphlab::buffered_exchange
   */
  template<typename T>
  class fiber_buffered_exchange {
  public:
    typedef std::vector<T> buffer_type;

    struct buffer_record {
      procid_t proc;
      buffer_type buffer;
      buffer_record() : proc(-1)  { }
    }; // end of buffer record
    typedef std::vector<buffer_record> recv_buffer_type;
    mutex lock;
  private:


    /** The rpc interface for this class */
    mutable dc_dist_object<fiber_buffered_exchange> rpc;

    std::vector<std::vector< buffer_record> > recv_buffers;


    struct send_record {
      oarchive* oarc;
      size_t numinserts;
    };

    std::vector<std::vector<send_record> > send_buffers;
    const size_t max_buffer_size;


    /**
     * Flushes the send buffer local to worker id "wid" and going to process proc
     */
    void flush_buffer(size_t wid, procid_t proc) {
      if(send_buffers[wid][proc].oarc) {
        // write the length at the end of the buffere are returning
        send_buffers[wid][proc].oarc->write(reinterpret_cast<char*>(&send_buffers[wid][proc].numinserts), sizeof(size_t));
        rpc.split_call_end(proc, send_buffers[wid][proc].oarc);
//         logstream(LOG_DEBUG) << rpc.procid() << ": Sending exchange of length " 
//                              << send_buffers[wid][proc].oarc->off << " to " 
//                              << proc << std::endl;
        send_buffers[wid][proc].oarc = NULL;
        send_buffers[wid][proc].numinserts = 0;
      }
    }

  public:
    /**
     * Constructs a buffered exchange object.
     *
     * \ref dc The master distributed_control object
     * \ref max_buffer_size The size of the per thread and per target send buffer.
     */
    fiber_buffered_exchange(distributed_control& dc,
                      const size_t max_buffer_size = DEFAULT_BUFFERED_EXCHANGE_SIZE) :
      rpc(dc, this),
      max_buffer_size(max_buffer_size) {
       send_buffers.resize(fiber_control::get_instance().num_workers());
       recv_buffers.resize(fiber_control::get_instance().num_workers());
       for (size_t i = 0;i < send_buffers.size(); ++i) {
         send_buffers[i].resize(dc.numprocs());
         for (size_t j = 0;j < send_buffers[i].size(); ++j) {
           send_buffers[i][j].oarc = NULL;
           send_buffers[i][j].numinserts = 0;
         }
       }
       rpc.barrier();
      }


    ~fiber_buffered_exchange() {
      // clear the send buffers
      for (size_t i = 0;i < send_buffers.size(); ++i) {
        for (size_t j = 0;j < send_buffers[i].size(); ++j) {
          if (send_buffers[i][j].oarc) rpc.split_call_cancel(send_buffers[i][j].oarc);
        }
      }
    }
    // fiber_buffered_exchange(distributed_control& dc, handler_type recv_handler,
    //                   size_t buffer_size = 1000) :
    // rpc(dc, this), send_buffers(dc.numprocs()), send_locks(dc.numprocs()),
    // max_buffer_size(buffer_size), recv_handler(recv_handler) { rpc.barrier(); }


    /**
     * Sends a value to a target machine.
     * Must be called from within a fiber
     */
    void send(const procid_t proc, const T& value) {
      size_t wid = fiber_control::get_worker_id();
      if (send_buffers[wid][proc].oarc == NULL) {
        send_buffers[wid][proc].oarc = rpc.split_call_begin(&fiber_buffered_exchange::rpc_recv);
        // write a header
        (*send_buffers[wid][proc].oarc) << rpc.procid();
        send_buffers[wid][proc].numinserts = 0;
      }

      (*(send_buffers[wid][proc].oarc)) << value;
      ++send_buffers[wid][proc].numinserts;


      if(send_buffers[wid][proc].oarc->off >= max_buffer_size) {
        flush_buffer(wid, proc);
      }
    } // end of send

    /**
     * Flushes the send buffers owned by the worker currently running the 
     * current fiber.
     */
    void partial_flush() {
      for(procid_t proc = 0; proc < rpc.numprocs(); ++proc) {
        flush_buffer(fiber_control::get_worker_id(), proc);
      }
    }

    /**
     * Flushes all send buffers. Must be called only on one thread.
     * Will not return until all machines call flush.
     */
    void flush() {
      for(size_t i = 0; i < send_buffers.size(); ++i) {
        for (size_t j = 0;j < send_buffers[i].size(); ++j) {
          flush_buffer(i,j);
        }
      }
      rpc.dc().flush();
      rpc.full_barrier();
    } // end of flush

    /**
     * Receives a collection of buffers.
     * Must be called from within a fiber.
     * \param ret_buffer If return value is true, this contains a collection of 
     *                   buffers sent to this machine.
     * \param self_buffer If true, only receives the worker local buffers.
     * \returns true If ret_buffer contains values.
     */
    bool recv(std::vector<buffer_record>& ret_buffer,
              const bool self_buffer = true) {
      fiber_control::fast_yield();
      ret_buffer.clear();
      bool success = false;
      if (self_buffer) {
        // get from my own buffer
        size_t wid = fiber_control::get_worker_id();
        lock.lock();
        if(!recv_buffers[wid].empty()) {
          success = true;
          std::swap(ret_buffer, recv_buffers[wid]);
        } else {
          for (size_t i = 0;i < recv_buffers.size(); ++i) {
            if(!recv_buffers[i].empty()) {
              success = true;
              std::swap(ret_buffer, recv_buffers[i]);
              break;
            }
          }
        }
        lock.unlock();
      } else {
        for (size_t i = 0;i < recv_buffers.size(); ++i) {
          if(!recv_buffers[i].empty()) {
            success = true;
            std::swap(ret_buffer, recv_buffers[i]);
            break;
          }
        }
      }
      return success;
    } // end of recv


    /**
     * Returns the number of elements avalable for receiving. 
     */
    size_t size() const {
      size_t count = 0;
      for (size_t i = 0;i < recv_buffers.size(); ++i) {
        count += recv_buffers[i].size();
      }
      return count;
    } // end of size

    /**
     * Returns true if there are no elements to receive.
     */
    bool empty() const { 
      for (size_t i = 0;i < recv_buffers.size(); ++i) {
        if (recv_buffers[i].size() > 0) return false;
      }
      return true;
    }

    void clear() { }

    void barrier() { rpc.barrier(); }
  private:
    void rpc_recv(size_t len, wild_pointer w) {
      buffer_type tmp;
      iarchive iarc(reinterpret_cast<const char*>(w.ptr), len);
      // first desrialize the source process
      procid_t src_proc; iarc >> src_proc;
//       logstream(LOG_DEBUG) << rpc.procid() << ": Receiving exchange of length "
//                            << len << " from " << src_proc << std::endl;
      // create an iarchive which just points to the last size_t bytes
      // to get the number of elements
      iarchive numel_iarc(reinterpret_cast<const char*>(w.ptr) + len - sizeof(size_t),
                          sizeof(size_t));
      size_t numel = 0; 
      numel_iarc.read(reinterpret_cast<char*>(&numel), sizeof(size_t));
      //std::cout << "Receiving: " << numel << "\n";
      tmp.resize(numel);
      for (size_t i = 0;i < numel; ++i) {
        iarc >> tmp[i];
      }

      size_t wid = fiber_control::get_worker_id();
      lock.lock();
      recv_buffers[wid].push_back(buffer_record());
      buffer_record& rec = recv_buffers[wid].back();
      rec.proc = src_proc;
      rec.buffer.swap(tmp);
      lock.unlock();
    } // end of rpc rcv


  }; // end of buffered exchange


}; // end of graphlab namespace
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/function_arg_types_def.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifdef __GLRPC_F0
#error "multiple includes of function arg types" 
#endif

#include <boost/type_traits/decay.hpp>
#include <boost/type_traits/remove_pointer.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/type_traits/function_traits.hpp>

#include <boost/function.hpp>
/*
A huge collection of useful typedefs.
F0... F5: identifies the arguments for an RPC aware function F. Dropping const and dropping references 
          (therefore allowing you to use F0....F5 to do casting.

__GLRPC_NIF0... __GLRPC_NIF5: identifies the arguments for an RPC unaware function F

__GLRPC_R0.... __GLRPC_R7: Identifies the actual arguments of the function F, without de-consting and de-reffing

__GLRPC_FRESULT: de-const and de-refed type of F's return type

__GLRPC_FARITY: the number of arguments F takes
*/
#define REMOVE_CONST_REF(REF) typename boost::remove_const<typename boost::remove_reference<REF>::type>::type
// non-intrusive calls
#define __GLRPC_NIF0 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg1_type)
#define __GLRPC_NIF1 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg2_type)
#define __GLRPC_NIF2 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg3_type)
#define __GLRPC_NIF3 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg4_type)
#define __GLRPC_NIF4 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg5_type)
#define __GLRPC_NIF5 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg6_type)
#define __GLRPC_NIF6 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg7_type)
#define __GLRPC_NIF7 REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::arg8_type)

// for rpc aware functions
#define __GLRPC_F0 __GLRPC_NIF2
#define __GLRPC_F1 __GLRPC_NIF3
#define __GLRPC_F2 __GLRPC_NIF4
#define __GLRPC_F3 __GLRPC_NIF5
#define __GLRPC_F4 __GLRPC_NIF6
#define __GLRPC_F5 __GLRPC_NIF7


#define __GLRPC_R0 typename boost::function<typename boost::remove_pointer<F>::type>::arg1_type
#define __GLRPC_R1 typename boost::function<typename boost::remove_pointer<F>::type>::arg2_type
#define __GLRPC_R2 typename boost::function<typename boost::remove_pointer<F>::type>::arg3_type
#define __GLRPC_R3 typename boost::function<typename boost::remove_pointer<F>::type>::arg4_type
#define __GLRPC_R4 typename boost::function<typename boost::remove_pointer<F>::type>::arg5_type
#define __GLRPC_R5 typename boost::function<typename boost::remove_pointer<F>::type>::arg6_type
#define __GLRPC_R6 typename boost::function<typename boost::remove_pointer<F>::type>::arg7_type
#define __GLRPC_R7 typename boost::function<typename boost::remove_pointer<F>::type>::arg8_type

#define __GLRPC_FRESULT REMOVE_CONST_REF(typename boost::function<typename boost::remove_pointer<F>::type>::result_type)

#define __GLRPC_FARITY boost::function<typename boost::remove_pointer<F>::type>::arity


================================================
FILE: src/graphlab/rpc/function_arg_types_undef.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#undef __GLRPC_F0
#undef __GLRPC_F1
#undef __GLRPC_F2
#undef __GLRPC_F3
#undef __GLRPC_F4
#undef __GLRPC_F5
#undef __GLRPC_NIF0
#undef __GLRPC_NIF1
#undef __GLRPC_NIF2
#undef __GLRPC_NIF3
#undef __GLRPC_NIF4
#undef __GLRPC_NIF5
#undef __GLRPC_NIF6
#undef __GLRPC_NIF7
#undef __GLRPC_R0
#undef __GLRPC_R1
#undef __GLRPC_R2
#undef __GLRPC_R3
#undef __GLRPC_R4
#undef __GLRPC_R5
#undef __GLRPC_R6
#undef __GLRPC_R7

#undef __GLRPC_FRESULT
#undef REMOVE_CONST_REF
#undef __GLRPC_FARITY


================================================
FILE: src/graphlab/rpc/function_broadcast_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef FUNCTION_BROADCAST_ISSUE_HPP
#define FUNCTION_BROADCAST_ISSUE_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <graphlab/rpc/function_call_dispatch.hpp>
#include <graphlab/rpc/function_call_issue.hpp>
#include <graphlab/rpc/is_rpc_call.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>

namespace graphlab{
namespace dc_impl {

/**

\ingroup rpc
\internal
\file function_broadcast_issue.hpp

See function_call_issue.hpp for details. This is equivalent to the macro
expansion in remote_call_issue with the difference that this takes an iterator 
sequence listing the machines to send to.

The code below generates the following for different number of arguments. Here, 
we demonstrate the 1 argument version.
\code
template < typename Iterator, typename F, typename T0 > 
class remote_broadcast_issue1 {
 public:
  static void exec (std::vector < dc_send * >&sender, unsigned char flags,
                    Iterator target_begin, Iterator target_end,
                    F remote_function, const T0 & i0) {
      oarchive arc;
      arc.buf = (char *) malloc (65536);
      arc.len = 65536;
      size_t len =
        dc_send::write_packet_header (arc, _get_procid (), flags,
              _get_sequentialization_key ());
      uint32_t beginoff = arc.off;
      dispatch_type d =
        function_call_issue_detail::dispatch_selector1 < typename is_rpc_call <
        F >::type, F, T0 >::dispatchfn ();
      arc << reinterpret_cast < size_t > (d);
      arc << reinterpret_cast < size_t > (remote_function);
      arc << i0;
      *(reinterpret_cast < uint32_t * >(arc.buf + len)) = arc.off - beginoff;
      Iterator iter = target_begin;
      while (iter != target_end) {
        oarchive *buf = get_thread_local_buffer (*iter);
        buf->write (arc.buf, arc.off);
        release_thread_local_buffer (*iter, flags & CONTROL_PACKET);
        ++iter;
      }
      free (arc.buf);
    }
};
\endcode
*/


#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENI(Z,N,_) BOOST_PP_CAT(i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


/**
The dispatch_selectorN structs are used to pick between the standard dispatcher and the nonintrusive dispatch
by checking if the function is a RPC style call or not.
*/
#define REMOTE_BROADCAST_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
template<typename Iterator, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(FNAME_AND_CALL, N) { \
  public: \
  static void exec(std::vector<dc_send*>& sender, unsigned char flags, Iterator target_begin, Iterator target_end, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive arc;       \
    arc.buf = (char*)malloc(INITIAL_BUFFER_SIZE); \
    arc.len = INITIAL_BUFFER_SIZE; \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(function_call_issue_detail::dispatch_selector,N)<typename is_rpc_call<F>::type, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T) >::dispatchfn();   \
    arc << reinterpret_cast<size_t>(d);       \
    arc << reinterpret_cast<size_t>(remote_function); \
    BOOST_PP_REPEAT(N, GENARC, _)                \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = arc.off - beginoff; \
    Iterator iter = target_begin; \
    while(iter != target_end) { \
      oarchive* buf = get_thread_local_buffer(*iter);  \
      buf->write(arc.buf, arc.off);  \
      release_thread_local_buffer(*iter, flags & CONTROL_PACKET); \
      ++iter;    \
    } \
    free(arc.buf); \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(); \
  }\
};


/**
Generates a function call issue. 3rd argument is the issue name
*/
BOOST_PP_REPEAT(6, REMOTE_BROADCAST_ISSUE_GENERATOR,  remote_broadcast_issue )


#undef GENARC
#undef GENT
#undef GENI
#undef GENARGS
#undef REMOTE_BROADCAST_ISSUE_GENERATOR

} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/function_call_dispatch.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef REPACK_DISPATCH_HPP
#define REPACK_DISPATCH_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>
#include <boost/preprocessor.hpp>
namespace graphlab {
namespace dc_impl {

/**
\ingroup rpc
\internal
\file function_call_dispatch.hpp

This is an internal function and should not be used directly

A "call" is an RPC which is performed asynchronously.
There are 2 types of calls. A "basic" call calls a standard C/C++ function
and does not require the function to be modified.
while the "regular" call requires the first 2 arguments of the function 
to be "distributed_control &dc, procid_t source".

A "dispatch" is a wrapper function on the receiving side of an RPC
which decodes the packet and performs the function call.

This scary looking piece of code is actually quite straightforward.
Given  function F, as well as input types T1 ... Tn
it will construct an input archive and deserialize the types T1.... Tn,
and call the function f with it. This code dispatches to the "intrusive" 
form of a function call (that is the function call must take a distributed_control
and a "procid_t source" as its first 2 arguments.

For instance, the 1 argument version of this is DISPATCH1:

\code
template<typename DcType, typename F , typename T0> 
void DISPATCH1 (DcType& dc, procid_t source, unsigned char packet_type_mask, 
                const char* buf, size_t len) { 
  iarchive iarc(buf, len);
  size_t s;
  iarc >> s;
  F f = reinterpret_cast<F>(s);
  T0 (f0) ;
  iarc >> (f0) ;
  f(dc, source , (f0) );
  charstring_free(f0);
} 
\endcode

charstring_free is a special template function which calls free(f1)
only if f1 is a character array (char*)

And similarly, the non-intrusive dispatch a little below

Note that the template around DcType is *deliberate*. This prevents this
function from instantiating the distributed_control until as late as possible, 
avoiding problems with circular references.
*/

#define GENFN(N) BOOST_PP_CAT(__GLRPC_F, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENARGS(Z,N,_)  (BOOST_PP_CAT(f, N))
#define GENPARAMS(Z,N,_)  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ; iarc >> (BOOST_PP_CAT(f, N)) ;
#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));

#define DISPATCH_GENERATOR(Z,N,_) \
template<typename DcType, typename F  BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
void BOOST_PP_CAT(DISPATCH,N) (DcType& dc, procid_t source, unsigned char packet_type_mask, \
               const char* buf, size_t len) { \
  iarchive iarc(buf, len); \
  size_t s; iarc >> s; F f = reinterpret_cast<F>(s); \
  BOOST_PP_REPEAT(N, GENPARAMS, _)                \
  f(dc, source BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_)  ); \
  BOOST_PP_REPEAT(N, CHARSTRINGFREE, _)                \
} 

BOOST_PP_REPEAT(6, DISPATCH_GENERATOR, _)

#undef GENFN
#undef GENFN2
#undef GENARGS
#undef GENPARAMS
#undef DISPATCH_GENERATOR


/**
This is similar, but generates the non-intrusive version of a 
dispatcher. That is, the target function does not need to take
"distributed_control &dc, procid_t source" as its first 2 arguments.

template<typename DcType, typename F , typename T0> 
void NONINTRUSIVE_DISPATCH1(DcType& dc, procid_t source, 
                            unsigned char packet_type_mask, 
                            const char* buf, size_t len) { 
  iarchive iarc(buf, len);
  size_t s;
  iarc >> s;
  F f = reinterpret_cast<F>(s);
  T0 (f0) ;
  iarc >> (f0) ;
  f( (f0) );
  charstring_free(f0);
}
*/


#define GENFN(N) BOOST_PP_CAT(__GLRPC_NIF, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENARGS(Z,N,_) (BOOST_PP_CAT(f, N))
#define GENPARAMS(Z,N,_)  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ; iarc >> (BOOST_PP_CAT(f, N)) ;
#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));


#define NONINTRUSIVE_DISPATCH_GENERATOR(Z,N,_) \
template<typename DcType, typename F  BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
void BOOST_PP_CAT(NONINTRUSIVE_DISPATCH,N) (DcType& dc, procid_t source, unsigned char packet_type_mask,  \
               const char* buf, size_t len) { \
  iarchive iarc(buf, len); \
  size_t s; iarc >> s; F f = reinterpret_cast<F>(s); \
  BOOST_PP_REPEAT(N, GENPARAMS, _)                \
  f(BOOST_PP_ENUM(N,GENARGS ,_)  ); \
  BOOST_PP_REPEAT(N, CHARSTRINGFREE, _)                \
} 

BOOST_PP_REPEAT(6, NONINTRUSIVE_DISPATCH_GENERATOR, _)

#undef GENFN
#undef GENFN2
#undef GENARGS
#undef GENPARAMS
#undef NONINTRUSIVE_DISPATCH_GENERATOR

} // namespace dc_impl
} // namespace graphlab


#include <graphlab/rpc/function_arg_types_undef.hpp>
#endif


================================================
FILE: src/graphlab/rpc/function_call_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef FUNCTION_CALL_ISSUE_HPP
#define FUNCTION_CALL_ISSUE_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/rpc/function_call_dispatch.hpp>
#include <graphlab/rpc/is_rpc_call.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>

namespace graphlab{
namespace dc_impl {

/**
\ingroup rpc
\internal
 * \file function_call_issue.hpp

This is an internal function and should not be used directly

A "call" is an RPC which is performed asynchronously.
There are 2 types of calls. A "basic" call calls a standard C/C++ function
and does not require the function to be modified.
while the "regular" call requires the first 2 arguments of the function
to be "distributed_control &dc, procid_t source".

An "issue" is a wrapper function on the sending side of an RPC
which encodes the packet and transmits it to the other side.
(I realized later this is called a "Marshaller")

Native Call Formats \n
=================== \n
The format of a "call" packet is in the form of an archive and is as follows

\li (dispatch_type*) -- pointer to target machine's dispatcher function
\li (void*)          -- pointer to target function
\li fn::arg1_type    -- target function's 1st argument
\li fn::arg2_type    -- target function's 2nd argument
\li  ...
\li fn::argN_type    -- target function's Nth argument

Argument casting is deferred to as late as possible. So the data type of
arguments are the data types that the caller use to call the function.
A dispatcher function will be instantiated with the input types, which will
then perform the type cast.


The code below generates the following for different number of arguments. Here, we
demonstrate the 1 argument version.
\code
namespace function_call_issue_detail {
  template < typename BoolType, typename F, typename T0 > struct dispatch_selector1 {
    static dispatch_type dispatchfn () {
      return dc_impl::NONINTRUSIVE_DISPATCH1 < distributed_control, F, T0 >;
    }
  };
  template < typename F, typename T0 > 
  struct dispatch_selector1 <boost::mpl::bool_ < true >, F, T0 > {
    static dispatch_type dispatchfn () {
      return dc_impl::DISPATCH1 < distributed_control, F, T0 >;
    }
  };
}

template < typename F, typename T0 > 
class remote_call_issue1 {
 public:
  static void exec (dc_send * sender, unsigned char flags, procid_t target,
                  F remote_function, const T0 & i0) {
    oarchive *ptr = get_thread_local_buffer (target);
    oarchive & arc = *ptr;
    if (reinterpret_cast < size_t > (remote_function) == reinterpret_cast <
	size_t > (request_reply_handler)) {
      flags |= REPLY_PACKET;
    }
    size_t len =
      dc_send::write_packet_header (arc, _get_procid (), flags,
				    _get_sequentialization_key ());
    uint32_t beginoff = arc.off;
    dispatch_type d =
      function_call_issue_detail::dispatch_selector1 < typename is_rpc_call <
      F >::type, F, T0 >::dispatchfn ();
    arc << reinterpret_cast < size_t > (d);
    arc << reinterpret_cast < size_t > (remote_function);
    arc << i0;
    *(reinterpret_cast < uint32_t * >(arc.buf + len)) = arc.off - beginoff;
    release_thread_local_buffer (target, flags & CONTROL_PACKET);
  }
};
\endcode

The basic idea of the code is straightforward.
The receiving end cannot call the target function (remote_function) directly, since it has
no means of understanding how to deserialize or to construct the stack for the remote_function.
So instead, we generate a "dispatch" function on the receiving side. The dispatch function
is constructed according to the type information of the remote_function, and therefore knows
how to deserialize the data, and issue the function call. That is the "dispatch_type".

However, since we defined two families of receiving functions:
 a non-intrusive version which does not take (dc, procid) as an argument
 and an intrusive version which does, the dispatch function must therefore be slightly different
 for each of them. That is what the dispatch_selector class performs.
 The first template argument of the dispatch_selector family of classes is a boolean flag which
 denotes whether the function is a non-intrusive call or not. This boolean flag itself
 is determined using the is_rpc_call<F>::type template.
*/


#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENI(Z,N,_) BOOST_PP_CAT(i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


/**
The dispatch_selectorN structs are used to pick between the standard dispatcher and the nonintrusive dispatch
by checking if the function is a RPC style call or not.
*/
#define REMOTE_CALL_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
namespace function_call_issue_detail {      \
template <typename BoolType, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct BOOST_PP_CAT(dispatch_selector, N){  \
  static dispatch_type dispatchfn() { return BOOST_PP_CAT(dc_impl::NONINTRUSIVE_DISPATCH,N)<distributed_control,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >; }  \
};\
template <typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct BOOST_PP_CAT(dispatch_selector, N)<boost::mpl::bool_<true>, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)>{  \
  static dispatch_type dispatchfn() { return BOOST_PP_CAT(dc_impl::DISPATCH,N)<distributed_control,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >; } \
}; \
} \
template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(FNAME_AND_CALL, N) { \
  public: \
  static void exec(dc_send* sender, unsigned char flags, procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive* ptr = get_thread_local_buffer(target);  \
    oarchive& arc = *ptr;                         \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(function_call_issue_detail::dispatch_selector,N)<typename is_rpc_call<F>::type, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T) >::dispatchfn();   \
    arc << reinterpret_cast<size_t>(d);       \
    arc << reinterpret_cast<size_t>(remote_function); \
    BOOST_PP_REPEAT(N, GENARC, _)                \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = arc.off - beginoff; \
    release_thread_local_buffer(target, flags & CONTROL_PACKET); \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(target); \
  }\
};


/**
Generates a function call issue. 3rd argument is the issue name
*/
BOOST_PP_REPEAT(6, REMOTE_CALL_ISSUE_GENERATOR,  remote_call_issue )


#undef GENARC
#undef GENT
#undef GENI
#undef GENARGS
#undef REMOTE_CALL_ISSUE_GENERATOR

} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/function_ret_type.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef FUNCTION_RETURN_TYPE_HPP
#define FUNCTION_RETURN_TYPE_HPP
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>
namespace graphlab {
namespace dc_impl {

  
/**
\ingroup rpc
\internal

This struct performs two duties.
Firstly, it provides a consistent interface through a function called ::fcallN<F>
to complete a function call with a variable number of arguments.
Next, it provides the type of the return value of the function in ::type.
If the return type is void, it is promoted to an int. This makes the output
type of the function call be always serializable, simplifying the implementation
of "requests".
*/
template <typename RetType>
struct function_ret_type {
  typedef RetType type;
  
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(__GLRPC_R, N)  BOOST_PP_CAT(i, N)
 
  #define FCALL(Z, N, _) \
  template <typename F> \
  static RetType BOOST_PP_CAT(fcall, N)(F f BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENARGS, _)){ \
    return f(BOOST_PP_ENUM_PARAMS(N, i)); \
  } 
    
  BOOST_PP_REPEAT(8, FCALL ,  _ )

  #undef FCALL
  #undef GENARGS

};

template <>
struct function_ret_type<void> {
  typedef size_t type;
  
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(__GLRPC_R, N) BOOST_PP_CAT(i, N)
 
  #define FCALL(Z, N, _) \
  template <typename F> \
  static size_t BOOST_PP_CAT(fcall, N)(F f BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENARGS, _)){ \
    f(BOOST_PP_ENUM_PARAMS(N, i)); \
    return 0; \
  } 
  
  BOOST_PP_REPEAT(8, FCALL ,  _ )

  #undef FCALL
  #undef GENARGS

};
#include <graphlab/rpc/function_arg_types_undef.hpp>


} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/mem_function_arg_types_def.hpp>

namespace graphlab {
namespace dc_impl {

/**
This struct performs two duties.
Firstly, it provides a consistent interface through a function called ::fcallN<F>
to complete a \b member function call with a variable number of arguments.
Next, it provides the type of the return value of the function in ::type.
If the return type is void, it is promoted to an int. This makes the output
type of the function call be always serializable, simplifying the implementation
of "requests".
*/
template <typename RetType>
struct mem_function_ret_type {
  typedef RetType type;
  
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(__GLRPC_R, N)  BOOST_PP_CAT(i, N)
 
  #define FCALL(Z, N, _) \
  template <typename F, typename T> \
  static RetType BOOST_PP_CAT(fcall, N)(F f , T t BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENARGS, _)){ \
    return (t->*f)(BOOST_PP_ENUM_PARAMS(N, i)); \
  }

  BOOST_PP_REPEAT(8, FCALL ,  _ )

  #undef FCALL
  #undef GENARGS

};

template <>
struct mem_function_ret_type<void> {
  typedef size_t type;
  
  #define GENARGS(Z,N,_)  BOOST_PP_CAT(__GLRPC_R, N) BOOST_PP_CAT(i, N)
 
  #define FCALL(Z, N, _) \
  template <typename F, typename T> \
  static size_t BOOST_PP_CAT(fcall, N)(F f , T t BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENARGS, _)){ \
     (t->*f)(BOOST_PP_ENUM_PARAMS(N, i)); \
     return 0; \
  }

  BOOST_PP_REPEAT(8, FCALL ,  _ )

  #undef FCALL
  #undef GENARGS

};


} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/mem_function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/get_current_process_hash.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/rpc/get_current_process_hash.hpp>
#include <graphlab/ui/mongoose/mongoose.h>

#ifdef __APPLE__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <libproc.h>
#include <unistd.h>
#endif
namespace graphlab {
namespace dc_impl {


#ifdef __linux
std::string get_current_process_hash() {
  char buf[33];
  mg_md5_file(buf, "/proc/self/exe");
  buf[32] = '\0';
  std::string ret = buf;
  if (ret.length() != 32) {
    ret = std::string(32, '0');
  }
  return ret;
}
#elif __APPLE__
std::string get_current_process_hash() {
  std::string ret;

  pid_t pid = getpid();
  char pathbuf[PROC_PIDPATHINFO_MAXSIZE];
  int pidsuccess = proc_pidpath (pid, pathbuf, sizeof(pathbuf));
  if (pidsuccess > 0) {
    char buf[33];
    mg_md5_file(buf,  pathbuf);
    buf[32] = '\0';
    ret = buf;
  }
  if (ret.length() != 32) {
    ret = std::string(32, '0');
  }
  return ret;
}
#endif

} // dc_impl
} // graphlab


================================================
FILE: src/graphlab/rpc/get_current_process_hash.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_RPC_GET_CURRENT_PROCESS_HASH_HPP
#define GRAPHLAB_RPC_GET_CURRENT_PROCESS_HASH_HPP
#include <string>
namespace graphlab {
namespace dc_impl {
std::string get_current_process_hash();
} // dc_impl 
} // graphlab
#endif


================================================
FILE: src/graphlab/rpc/is_rpc_call.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef IS_RPC_CALL_HPP
#define IS_RPC_CALL_HPP
#include <boost/type_traits/remove_pointer.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/type_traits/function_traits.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/mpl/if.hpp>
#include <boost/mpl/and.hpp>
#include <boost/mpl/bool.hpp>
#include <boost/mpl/less.hpp>
#include <boost/mpl/comparison.hpp>
#include <boost/mpl/int.hpp>

#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>


namespace graphlab {
class distributed_control;
namespace dc_impl {

namespace is_rpc_call_detail {
/**
\ingroup rpc
\internal
Whether the function has less than or equal to 2 arguments
*/
template <typename F>
struct less_than_2_args {
  typedef typename boost::mpl::bool_<__GLRPC_FARITY < 2 >::type type;  
};


/**
\ingroup rpc
\internal
Now, arg1_type and arg_2 type may not exist in function_traits if the 
number of arguments is < 2. I will need to wrap it to make it safe
*/
template <typename F, size_t nargs>
struct get_args{
 typedef __GLRPC_NIF0 arg1_type;
 typedef __GLRPC_NIF1 arg2_type;
};

// if 0 args. then make both void
template <typename F>
struct get_args<F, 0>{
 typedef void arg1_type;
 typedef void arg2_type;
};

// if 1 arg then make just make arg2 void
template <typename F>
struct get_args<F, 1>{
 typedef __GLRPC_NIF0 arg1_type;
 typedef void arg2_type;
};


template <typename F>
struct check_first_arg {
  typedef typename boost::is_same<typename get_args<F,__GLRPC_FARITY>::arg1_type, distributed_control>::type type;  
};

template <typename F>
struct check_second_arg {
  typedef typename boost::is_integral<typename get_args<F,__GLRPC_FARITY>::arg2_type>::type type;  
};


}

/**
 * \ingroup rpc
 * \internal
 * ::type is true if F is an RPC call interface.
 * \tparam F the function to test
 */
template <typename F>
struct is_rpc_call {
  typedef typename boost::mpl::if_< typename is_rpc_call_detail::less_than_2_args<F>::type,
               boost::false_type,
               typename boost::mpl::and_<
                    typename is_rpc_call_detail::check_first_arg<F>::type, 
                    typename is_rpc_call_detail::check_second_arg<F>::type>::type >::type type;
               
               
};

// Varargs are all none RPC calls
#define BLOCK_VAR_ARGS(Z,N,_)  \
template <typename RetType BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct is_rpc_call<RetType (BOOST_PP_ENUM_PARAMS(N, T) BOOST_PP_COMMA_IF(N) ...)> { \
   typedef boost::false_type type; \
}; \
\
template <typename RetType BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct is_rpc_call<RetType (*)(BOOST_PP_ENUM_PARAMS(N, T) BOOST_PP_COMMA_IF(N) ...)> { \
   typedef boost::false_type type; \
};
BOOST_PP_REPEAT(6, BLOCK_VAR_ARGS, _)
#undef BLOCK_VAR_ARGS

#define GEN_GET_USER_ARG(Z,N,_)  \
template <typename F, typename BoolType>  \
struct BOOST_PP_CAT(get_cleaned_rpc_or_basic_arg, N) { \
  typedef BOOST_PP_CAT(__GLRPC_NIF, N) arg_type;  \
};  \
template <typename F> \
struct BOOST_PP_CAT(get_cleaned_rpc_or_basic_arg, N) <F,  boost::mpl::bool_<true> > {  \
  typedef BOOST_PP_CAT(__GLRPC_F, N) arg_type;  \
};  \
template <typename F>   \
struct BOOST_PP_CAT(get_cleaned_user_arg, N) {  \
  typedef typename BOOST_PP_CAT(get_cleaned_rpc_or_basic_arg, N)<F,typename is_rpc_call<F>::type>::arg_type arg_type; \
};

BOOST_PP_REPEAT(6, GEN_GET_USER_ARG, _)
#undef GEN_GET_USER_ARG

} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/lazy_dht.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/*
  \author Yucheng Low (ylow)
  An implementation of a distributed integer -> integer map with caching
  capabilities. 

*/

#ifndef GRAPHLAB_LAZY_DHT_HPP
#define GRAPHLAB_LAZY_DHT_HPP

#include <boost/unordered_map.hpp>
#include <boost/intrusive/list.hpp>

#include <graphlab/rpc/dc.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/synchronized_unordered_map.hpp>
#include <graphlab/util/dense_bitset.hpp>


namespace graphlab {

  /**
     \internal
     \ingroup rpc 


     This implements a distributed key -> value map with caching
     capabilities.  It is up to the user to determine cache
     invalidation policies. User explicitly calls the invalidate()
     function to clear local cache entries.  This is an extremely lazy
     DHT in that it is up to the user to guarantee that the keys are
     unique. Any machine can call set on any key, and the result of
     the key will be stored locally. Reads on any unknown keys will be
     resolved using a broadcast operation.
  */

  template<typename KeyType, typename ValueType>
  class lazy_dht{
  public:

    typedef dc_impl::lru_list<KeyType, ValueType> lru_entry_type;
    /// datatype of the data map
    typedef boost::unordered_map<KeyType, ValueType> map_type;
    /// datatype of the local cache map
    typedef boost::unordered_map<KeyType, lru_entry_type* > cache_type;

    struct wait_struct {
      mutex mut;
      conditional cond;
      ValueType val;
      size_t numreplies;
      bool hasvalue;
    };

    typedef boost::intrusive::member_hook<lru_entry_type,
                                          typename lru_entry_type::lru_member_hook_type,
                                          &lru_entry_type::member_hook_> MemberOption;
    /// datatype of the intrusive LRU list embedded in the cache map
    typedef boost::intrusive::list<lru_entry_type, 
                                   MemberOption, 
                                   boost::intrusive::constant_time_size<false> > lru_list_type;

    /// Constructor. Creates the integer map.
    lazy_dht(distributed_control &dc, 
             size_t max_cache_size = 65536):rmi(dc, this),data(11) {
      cache.rehash(max_cache_size);
      maxcache = max_cache_size;
      logger(LOG_INFO, "%d Creating distributed_hash_table. Cache Limit = %d", 
             dc.procid(), maxcache);
      reqs = 0;
      misses = 0;
      dc.barrier();
    }


    ~lazy_dht() {
      data.clear();
      typename cache_type::iterator i = cache.begin();
      while (i != cache.end()) {
        delete i->second;
        ++i;
      }
      cache.clear();
    }
  
  
    /// Sets the key to the value
    void set(const KeyType& key, const ValueType &newval)  {
      datalock.lock();
      data[key] = newval;
      datalock.unlock();
    }
  

    std::pair<bool, ValueType> get_owned(const KeyType &key) const {
      std::pair<bool, ValueType> ret;
      datalock.lock();
      typename map_type::const_iterator iter = data.find(key);    
      if (iter == data.end()) {
        ret.first = false;
      }
      else {
        ret.first = true;
        ret.second = iter->second;
      }
      datalock.unlock();
      return ret;
    }
  
    void remote_get_owned(const KeyType &key, procid_t source, size_t ptr) const {
      std::pair<bool, ValueType> ret;
      datalock.lock();
      typename map_type::const_iterator iter = data.find(key);    
      if (iter == data.end()) {
        ret.first = false;
      }
      else {
        ret.first = true;
        ret.second = iter->second;
      }
      datalock.unlock();
      rmi.remote_call(source, &lazy_dht<KeyType,ValueType>::get_reply, ptr, ret.second, ret.first);
    }

    void get_reply(size_t ptr, ValueType& val, bool hasvalue) {
      wait_struct* w = reinterpret_cast<wait_struct*>(ptr);      
      w->mut.lock();
      if (hasvalue) {
        w->val = val;
        w->hasvalue = true;
      }
      w->numreplies--;
      if (w->numreplies == 0) w->cond.signal();
      w->mut.unlock();
    
    }

    /** Gets the value associated with the key. returns true on success.. */
    std::pair<bool, ValueType> get(const KeyType &key) const {
      std::pair<bool, ValueType> ret = get_owned(key);
      if (ret.first) return ret;
    
      wait_struct w;
      w.numreplies = rmi.numprocs() - 1;
      size_t ptr = reinterpret_cast<size_t>(&w);
      // otherwise I need to find someone with the key
      for (size_t i = 0;i < rmi.numprocs(); ++i) {
        if (i != rmi.procid()) {
          rmi.remote_call(i, &lazy_dht<KeyType,ValueType>::remote_get_owned, key, rmi.procid(), ptr);
        }
      }
      w.mut.lock();
      while (w.numreplies > 0) w.cond.wait(w.mut);
      w.mut.unlock();
      ret.first = w.hasvalue;
      ret.second = w.val;
      if (ret.first) update_cache(key, ret.second);
      return ret;
    }


    /** Gets the value associated with the key, reading from cache if available
        Note that the cache may be out of date. */
    std::pair<bool, ValueType> get_cached(const KeyType &key) const {
      std::pair<bool, ValueType> ret = get_owned(key);
      if (ret.first) return ret;
    
      reqs++;
      cachelock.lock();
      // check if it is in the cache
      typename cache_type::iterator i = cache.find(key);
      if (i == cache.end()) {
        // nope. not in cache. Call the regular get
        cachelock.unlock();
        misses++;
        return get(key);
      }
      else {
        // yup. in cache. return the value
        ret.first = true;
        ret.second = i->second->value;
        // shift the cache entry to the head of the LRU list
        lruage.erase(lru_list_type::s_iterator_to(*(i->second)));
        lruage.push_front(*(i->second));
        cachelock.unlock();
        return ret;
      }
    }

    /// Invalidates the cache entry associated with this key
    void invalidate(const KeyType &key) const{
      cachelock.lock();
      // is the key I am invalidating in the cache?
      typename cache_type::iterator i = cache.find(key);
      if (i != cache.end()) {
        // drop it from the lru list
        delete i->second;
        cache.erase(i);
      }
      cachelock.unlock();
    }


    double cache_miss_rate() {
      return double(misses) / double(reqs);
    }

    size_t num_gets() const {
      return reqs;
    }
    size_t num_misses() const {
      return misses;
    }

    size_t cache_size() const {
      return cache.size();
    }

  private:

    mutable dc_dist_object<lazy_dht<KeyType, ValueType> > rmi;
  
    mutex datalock;
    map_type data;  /// The actual table data that is distributed

  
    mutex cachelock; /// lock for the cache datastructures
    mutable cache_type cache;   /// The cache table
    mutable lru_list_type lruage; /// THe LRU linked list associated with the cache


    procid_t numprocs;   /// NUmber of processors
    size_t maxcache;     /// Maximum cache size allowed

    mutable size_t reqs;
    mutable size_t misses;
  

    /// Updates the cache with this new value
    void update_cache(const KeyType &key, const ValueType &val) const{
      cachelock.lock();
      typename cache_type::iterator i = cache.find(key);
      // create a new entry
      if (i == cache.end()) {
        cachelock.unlock();
        // if we are out of room, remove the lru entry
        if (cache.size() >= maxcache) remove_lru();
        cachelock.lock();
        // insert the element, remember the iterator so we can push it
        // straight to the LRU list
        std::pair<typename cache_type::iterator, bool> ret = cache.insert(std::make_pair(key, new lru_entry_type(key, val)));
        if (ret.second)  lruage.push_front(*(ret.first->second));
      }
      else {
        // modify entry in place
        i->second->value = val;
        // swap to front of list
        //boost::swap_nodes(lru_list_type::s_iterator_to(i->second), lruage.begin());
        lruage.erase(lru_list_type::s_iterator_to(*(i->second)));
        lruage.push_front(*(i->second));
      }
      cachelock.unlock();
    }

    /// Removes the least recently used element from the cache
    void remove_lru() const{
      cachelock.lock();
      KeyType keytoerase = lruage.back().key;
      // is the key I am invalidating in the cache?
      typename cache_type::iterator i = cache.find(keytoerase);
      if (i != cache.end()) {
        // drop it from the lru list
        delete i->second;
        cache.erase(i);
      }
      cachelock.unlock();
    }

  };

}
#endif


================================================
FILE: src/graphlab/rpc/mem_function_arg_types_def.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// check for multiple inclusions
#ifdef __GLRPC_F0
#error "multiple includes of function arg types"
#endif

#include <boost/type_traits/decay.hpp>
#include <graphlab/util/generics/remove_member_pointer.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/type_traits/function_traits.hpp>
#include <boost/function.hpp>
// This is the member function version of function_arg_types
/*
A huge collection of useful typedefs.
F0... F5: identifies the arguments for an RPC aware function F. Dropping const and dropping references 
          (therefore allowing you to use F0....F5 to do casting.

__GLRPC_NIF0... __GLRPC_NIF5: identifies the arguments for an RPC unaware function F

__GLRPC_R0.... __GLRPC_R7: Identifies the actual arguments of the function F, without de-consting and de-reffing

__GLRPC_FRESULT: de-const and de-refed type of F's return type

__GLRPC_FARITY: the number of arguments F takes
*/
#define REMOVE_CONST_REF(REF) typename boost::remove_const<typename boost::remove_reference<REF>::type>::type


//#define F0 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg1_type)
//#define __GLRPC_FRESULT REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::result_type)

#define __GLRPC_NIF0 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg1_type)
#define __GLRPC_NIF1 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg2_type)
#define __GLRPC_NIF2 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg3_type)
#define __GLRPC_NIF3 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg4_type)
#define __GLRPC_NIF4 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg5_type)
#define __GLRPC_NIF5 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg6_type)
#define __GLRPC_NIF6 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg7_type)
#define __GLRPC_NIF7 REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::arg8_type)


#define __GLRPC_R0 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg1_type
#define __GLRPC_R1 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg2_type
#define __GLRPC_R2 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg3_type
#define __GLRPC_R3 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg4_type
#define __GLRPC_R4 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg5_type
#define __GLRPC_R5 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg6_type
#define __GLRPC_R6 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg7_type
#define __GLRPC_R7 typename boost::function<typename boost::remove_member_pointer<F>::type>::arg8_type

#define __GLRPC_FRESULT REMOVE_CONST_REF(typename boost::function<typename boost::remove_member_pointer<F>::type>::result_type)

#define __GLRPC_FARITY boost::function<typename boost::remove_member_pointer<F>::type>::arity


================================================
FILE: src/graphlab/rpc/mem_function_arg_types_undef.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#undef __GLRPC_NIF0
#undef __GLRPC_NIF1
#undef __GLRPC_NIF2
#undef __GLRPC_NIF3
#undef __GLRPC_NIF4
#undef __GLRPC_NIF5
#undef __GLRPC_NIF6
#undef __GLRPC_NIF7
#undef __GLRPC_R0
#undef __GLRPC_R1
#undef __GLRPC_R2
#undef __GLRPC_R3
#undef __GLRPC_R4
#undef __GLRPC_R5
#undef __GLRPC_R6
#undef __GLRPC_R7

#undef __GLRPC_FRESULT
#undef REMOVE_CONST_REF
#undef __GLRPC_FARITY


================================================
FILE: src/graphlab/rpc/object_broadcast_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef OBJECT_BROADCAST_ISSUE_HPP
#define OBJECT_BROADCAST_ISSUE_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/rpc/object_call_dispatch.hpp>
#include <graphlab/rpc/object_call_issue.hpp>
#include <graphlab/rpc/is_rpc_call.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>

namespace graphlab{
namespace dc_impl {

/**
\ingroup rpc
\internal
\file object_broadcast_issue.hpp
 This is an internal function and should not be used directly

See object_call_issue.hpp for details. This is equivalent to the macro
expansion in object_call_issue with the difference that this takes an iterator 
sequence listing the machines to send to.

The code below generates the following for different number of arguments. Here, 
we demonstrate the 1 argument version.

\code
template < typename Iterator, typename T, typename F, typename T0 > 
class object_broadcast_issue1 {
 public:
  static void exec (dc_dist_object_base * rmi,
                    std::vector < dc_send * >sender, unsigned char flags,
                    Iterator target_begin, Iterator target_end, size_t objid,
                    F remote_function, const T0 & i0) {
    oarchive arc;
    arc.buf = (char *) malloc (65536);
    arc.len = 65536;
    size_t len =
      dc_send::write_packet_header (arc, _get_procid (), flags,
				    _get_sequentialization_key ());
    uint32_t beginoff = arc.off;
    dispatch_type d =
      dc_impl::OBJECT_NONINTRUSIVE_DISPATCH1 < distributed_control, T, F,
      T0 >;
    arc << reinterpret_cast < size_t > (d);
    serialize (arc, (char *) (&remote_function), sizeof (F));
    arc << objid;
    arc << i0;
    uint32_t curlen = arc.off - beginoff;
    *(reinterpret_cast < uint32_t * >(arc.buf + len)) = curlen;
    Iterator iter = target_begin;
    while (iter != target_end) {
      oarchive *buf = get_thread_local_buffer (*iter);
      buf->write (arc.buf, arc.off);
      release_thread_local_buffer (*iter, flags & CONTROL_PACKET);
      if ((flags & CONTROL_PACKET) == 0) {
        rmi->inc_bytes_sent ((*iter), curlen);
      }
      ++iter;
    }
    free (arc.buf);
  }
};

\endcode
*/

#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENI(Z,N,_) BOOST_PP_CAT(i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


#define REMOTE_BROADCAST_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
template<typename Iterator, typename T, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(BOOST_PP_TUPLE_ELEM(2,0,FNAME_AND_CALL), N) { \
  public: \
  static void exec(dc_dist_object_base* rmi, std::vector<dc_send*> sender, unsigned char flags, \
                    Iterator target_begin, Iterator target_end, size_t objid, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive arc;       \
    arc.buf = (char*)malloc(INITIAL_BUFFER_SIZE); \
    arc.len = INITIAL_BUFFER_SIZE; \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(dc_impl::OBJECT_NONINTRUSIVE_DISPATCH,N)<distributed_control,T,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >;   \
    arc << reinterpret_cast<size_t>(d);                                 \
    serialize(arc, (char*)(&remote_function), sizeof(F));               \
    arc << objid;                                                       \
    BOOST_PP_REPEAT(N, GENARC, _)                                       \
    uint32_t curlen = arc.off - beginoff;   \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = curlen; \
    Iterator iter = target_begin;                                       \
    while(iter != target_end) { \
      oarchive* buf = get_thread_local_buffer(*iter);  \
      buf->write(arc.buf, arc.off);  \
      release_thread_local_buffer(*iter, flags & CONTROL_PACKET); \
      if ((flags & CONTROL_PACKET) == 0) {                                 \
        rmi->inc_bytes_sent((*iter), curlen); \
      } \
      ++iter; \
    } \
    free(arc.buf); \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(); \
  }  \
};


/**
Generates a function call issue. 3rd argument is a tuple (issue name, dispacther name)
*/
BOOST_PP_REPEAT(7, REMOTE_BROADCAST_ISSUE_GENERATOR,  (object_broadcast_issue, _) )


#undef GENARC
#undef GENT
#undef GENI
#undef GENARGS
#undef REMOTE_BROADCAST_ISSUE_GENERATOR

} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/mem_function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/object_call_dispatch.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_OBJECT_CALL_DISPATCH_HPP
#define GRAPHLAB_OBJECT_CALL_DISPATCH_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>
#include <boost/preprocessor.hpp>
namespace graphlab {
namespace dc_impl {


/**
\ingroup rpc
\internal
\file object_call_dispatch.hpp
This is an internal function and should not be used directly

This is similar to a regular function call in function_call_dispatch.hpp
with the only difference
that it needs to locate the object using dc.get_registered_object(...)
After the function call, it also needs to increment the call count for
the object context.
\code
template<typename DcType, typename T, typename F , typename T0 > 
void OBJECT_NONINTRUSIVE_DISPATCH1(DcType& dc, procid_t source, 
                                   unsigned char packet_type_mask, 
                                   const char* buf, size_t len){ 
  iarchive iarc(buf, len);
  F f;
  deserialize(iarc, (char*)(&f), sizeof(F));
  size_t objid;
  iarc >> objid;
  T* obj = reinterpret_cast<T*>(dc.get_registered_object(objid));
  T0 (f0) ;
  iarc >> (f0) ;
  (obj->*f)( (f0) );
  charstring_free(f0);
  if ((packet_type_mask & CONTROL_PACKET) == 0) dc.get_rmi_instance(objid)->inc_calls_received(source);
}
\endcode
*/


#define GENFN(N) BOOST_PP_CAT(__GLRPC_NIF, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENARGS(Z,N,_) (BOOST_PP_CAT(f, N))

/**
 * This macro defines and deserializes each of the parameters to the
 * function.
 */
#define GENPARAMS(Z,N,_)  \
  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ; \
  iarc >> (BOOST_PP_CAT(f, N)) ;

#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));


#define OBJECT_NONINTRUSIVE_DISPATCH_GENERATOR(Z,N,_)                   \
  template<typename DcType, typename T,                                 \
           typename F BOOST_PP_COMMA_IF(N)                              \
           BOOST_PP_ENUM_PARAMS(N, typename T) >                        \
  void BOOST_PP_CAT(OBJECT_NONINTRUSIVE_DISPATCH,N)(DcType& dc,         \
                                                    procid_t source,    \
                                                    unsigned char packet_type_mask, \
                                                    const char* buf, size_t len){ \
    iarchive iarc(buf, len);                                                \
    F f;                                                                \
    deserialize(iarc, (char*)(&f), sizeof(F));                          \
    size_t objid;                                                       \
    iarc >> objid;                                                      \
    T* obj = reinterpret_cast<T*>(dc.get_registered_object(objid));     \
    /* Deserialize the arguments to f */                                \
    BOOST_PP_REPEAT(N, GENPARAMS, _);                                   \
    /* Invoke f */                                                      \
    (obj->*f)(BOOST_PP_ENUM(N,GENARGS ,_)  );                           \
    /* Free the buffers for the args */                                 \
    BOOST_PP_REPEAT(N, CHARSTRINGFREE, _) ;                             \
    /* Count the call if not a control call */                          \
    if ((packet_type_mask & CONTROL_PACKET) == 0)                       \
      dc.get_rmi_instance(objid)->inc_calls_received(source);           \
  } 


/**
 * This macro generates dispatch functions for functions for rpc calls
 * with up to 6 arguments.
 *
 * Remarks: If the compiler generates the following error "Too
 * few/many arguments to function" at this point is is due to the
 * caller not providing the correct number fo arguments in the RPC
 * call.  Note that default arguments are NOT supported in rpc calls
 * and so all arguments must be provided.
 *
 */
BOOST_PP_REPEAT(7, OBJECT_NONINTRUSIVE_DISPATCH_GENERATOR, _)


#undef GENFN
#undef GENFN2
#undef GENARGS
#undef GENPARAMS
#undef NONINTRUSIVE_DISPATCH_GENERATOR

} // namespace dc_impl
} // namespace graphlab


#include <graphlab/rpc/mem_function_arg_types_undef.hpp>
#endif


================================================
FILE: src/graphlab/rpc/object_call_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef OBJECT_CALL_ISSUE_HPP
#define OBJECT_CALL_ISSUE_HPP
#include <iostream>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/dc_send.hpp>
#include <graphlab/rpc/object_call_dispatch.hpp>
#include <graphlab/rpc/is_rpc_call.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <boost/preprocessor.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <graphlab/util/generics/blob.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>

namespace graphlab{
namespace dc_impl {

/**
\ingroup rpc
\internal
\file object_call_issue.hpp
 This is an internal function and should not be used directly

Marshalls a object function call to a remote machine.
This is similar to the regular function call in function_call_issue.hpp
with the only difference that the object id needs to be transmitted as well.

\code
template < typename T, typename F, typename T0 > 
class object_call_issue1 {
 public:
  static void exec (dc_dist_object_base * rmi, dc_send * sender,
                    unsigned char flags, procid_t target, size_t objid,
                    F remote_function, const T0 & i0) {
    oarchive *ptr = get_thread_local_buffer (target);
    oarchive & arc = *ptr;
    size_t len =
      dc_send::write_packet_header (arc, _get_procid (), flags,
				    _get_sequentialization_key ());
    uint32_t beginoff = arc.off;
    dispatch_type d =
      dc_impl::OBJECT_NONINTRUSIVE_DISPATCH1 < distributed_control, T, F,
      T0 >;
    arc << reinterpret_cast < size_t > (d);
    serialize (arc, (char *) (&remote_function), sizeof (F));
    arc << objid;
    arc << i0;
    uint32_t curlen = arc.off - beginoff;
    *(reinterpret_cast < uint32_t * >(arc.buf + len)) = curlen;
    release_thread_local_buffer (target, flags & CONTROL_PACKET);
    if ((flags & CONTROL_PACKET) == 0) {
      rmi->inc_bytes_sent (target, curlen);
    }
  }
};
\endcode
*/

#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENI(Z,N,_) BOOST_PP_CAT(i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


/**
The dispatch_selectorN structs are used to pick between the standard dispatcher and the nonintrusive dispatch
by checking if the function is a RPC style call or not.
*/
#define REMOTE_CALL_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
template<typename T, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(BOOST_PP_TUPLE_ELEM(2,0,FNAME_AND_CALL), N) { \
  public: \
  static void exec(dc_dist_object_base* rmi, dc_send* sender, unsigned char flags, procid_t target, size_t objid, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive* ptr = get_thread_local_buffer(target);  \
    oarchive& arc = *ptr;                         \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(dc_impl::OBJECT_NONINTRUSIVE_DISPATCH,N)<distributed_control,T,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >;   \
    arc << reinterpret_cast<size_t>(d);       \
    serialize(arc, (char*)(&remote_function), sizeof(F)); \
    arc << objid;       \
    BOOST_PP_REPEAT(N, GENARC, _)                \
    uint32_t curlen = arc.off - beginoff;   \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = curlen; \
    release_thread_local_buffer(target, flags & CONTROL_PACKET); \
    if ((flags & CONTROL_PACKET) == 0) {                      \
      rmi->inc_bytes_sent(target, curlen);           \
    } \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(target); \
  } \
  \
};


/**
 * \ingroup rpc
 * \internal
 *
 * This generates a "split call". Where the header of the call message
 * is written to with split_call_begin, and the message actually sent with
 * split_call_end(). It is then up to the user to serialize the message arguments
 * into the oarchive returned. The split call can provide performance gains 
 * when the contents of the message are large, since this allows the user to
 * control the serialization process. For examples, see 
 * \ref dc_dist_object::split_call_begin
 */
template <typename T, typename F>
class object_split_call {
 public:
  static oarchive* split_call_begin(dc_dist_object_base* rmi, size_t objid, F remote_function) {
    oarchive* ptr = new oarchive;
    oarchive& arc = *ptr;
    arc.buf = (char*)malloc(INITIAL_BUFFER_SIZE); 
    arc.len = INITIAL_BUFFER_SIZE; 
    arc.advance(sizeof(packet_hdr));
    dispatch_type d = dc_impl::OBJECT_NONINTRUSIVE_DISPATCH2<distributed_control,T,F,size_t, wild_pointer>;
    arc << reinterpret_cast<size_t>(d);
    serialize(arc, (char*)(&remote_function), sizeof(F));
    arc << objid;
    // make a gap for the blob size argument
    // write the largest possible size_t. That will allow it to bypass
    // dynamic length encoding issues.
    // patch the header with the offset to this point. 
    (*reinterpret_cast<size_t*>(arc.buf)) = arc.off + 1;
    arc << (size_t)(-1);
    return ptr;
  }
  static void split_call_cancel(oarchive* oarc) {
    free(oarc->buf);
    delete oarc;
  }

/**
 * \ingroup rpc
 * \internal
 *
 * This sends a message first created with split_call_begin. The archive
 * pointer is consumed.
 */
  static void split_call_end(dc_dist_object_base* rmi,
                             oarchive* oarc, dc_send* sender, procid_t target, unsigned char flags) {
    // header points to the location of the blob size argument
    size_t blobsize_offset = *reinterpret_cast<size_t*>(oarc->buf);
    (*reinterpret_cast<size_t*>(oarc->buf + blobsize_offset)) = oarc->off - blobsize_offset - sizeof(size_t);
    // write the packet header
    packet_hdr* hdr = reinterpret_cast<packet_hdr*>(oarc->buf);
    hdr->len = oarc->off - sizeof(packet_hdr);
    hdr->src = _get_procid();
    hdr->packet_type_mask = flags;
    hdr->sequentialization_key = _get_sequentialization_key();
    size_t len = hdr->len;
    write_thread_local_buffer(target, oarc->buf, oarc->off, flags & CONTROL_PACKET);
    if ((flags & CONTROL_PACKET) == 0) {
      rmi->inc_bytes_sent(target, len);
    }
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(target); 
    delete oarc;
  }
};

/**
Generates a function call issue. 3rd argument is a tuple (issue name, dispacther name)
*/

BOOST_PP_REPEAT(7, REMOTE_CALL_ISSUE_GENERATOR,  (object_call_issue, _) )


#undef GENARC
#undef GENT
#undef GENI
#undef GENARGS
#undef REMOTE_CALL_ISSUE_GENERATOR

} // namespace dc_impl
} // namespace graphlab

#include <graphlab/rpc/mem_function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/object_request_dispatch.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef OBJECT_REQUEST_DISPATCH_HPP
#define OBJECT_REQUEST_DISPATCH_HPP
#include <sstream>
#include <iostream>
#include <string>
#include <functional>
#include <algorithm>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <boost/bind.hpp>
#include <boost/mem_fn.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>
#include <boost/preprocessor.hpp>

namespace graphlab {
namespace dc_impl{


/**
\ingroup rpc
\internal
\file object_request_dispatch.hpp

This is an internal function and should not be used directly

This is the dispatch function for the an object request.
This is similar to the standard request dispatcher in request_dispatch.hpp
except that the object needs to be located using the object id.
After the function call, it also needs to increment the call count for
the object context.

\code
template < typename DcType, typename T, typename F, typename T0 > 
void OBJECT_NONINTRUSIVE_REQUESTDISPATCH1 (DcType & dc, procid_t source,
                                           unsigned char packet_type_mask,
                                           const char *buf, size_t len) {
  iarchive iarc (buf, len);
  F f;
  deserialize (iarc, (char *) (&f), sizeof (F));
  size_t objid;
  iarc >> objid;
  T *obj = reinterpret_cast < T * >(dc.get_registered_object (objid));
  size_t id;
  iarc >> id;
  T0 (f0);
  iarc >> (f0);
  typename function_ret_type < 
    typename boost::remove_const < 
    typename boost::remove_reference < 
    typename boost::function < 
    typename boost::remove_member_pointer < F >::type >::result_type >::type >::type >::type 
        ret = mem_function_ret_type < 
                typename boost::remove_const <
                typename boost::remove_reference < 
                typename boost::function < 
                typename boost::remove_member_pointer < F >::type >::result_type >::type >::type >::fcall1 (f, obj, (f0));

  charstring_free (f0);
  boost::iostreams::stream < resizing_array_sink > retstrm (128);
  oarchive oarc (retstrm);
  oarc << ret;
  retstrm.flush ();
  if ((packet_type_mask & CONTROL_PACKET) == 0) {
    dc.get_rmi_instance (objid)->inc_calls_received (source);
    dc.get_rmi_instance (objid)->inc_bytes_sent (source, retstrm->len);
  }
  if (packet_type_mask & CONTROL_PACKET) {
    dc.control_call (source, request_reply_handler, id,
		     blob (retstrm->str, retstrm->len));
  }
  else {
    dc.reply_remote_call (source, request_reply_handler, id,
			  blob (retstrm->str, retstrm->len));
  }
  free (retstrm->str);
}
\endcode


*/
#define GENFN(N) BOOST_PP_CAT(__GLRPC_NIF, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENNIARGS(Z,N,_) (BOOST_PP_CAT(f, N))

#define GENPARAMS(Z,N,_)                                                \
  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ;                             \
  iarc >> (BOOST_PP_CAT(f, N)) ;

#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));

#define NONINTRUSIVE_DISPATCH_GENERATOR(Z,N,_)                          \
  template<typename DcType, typename T,                                 \
           typename F  BOOST_PP_COMMA_IF(N)                             \
           BOOST_PP_ENUM_PARAMS(N, typename T) >                        \
  void BOOST_PP_CAT(OBJECT_NONINTRUSIVE_REQUESTDISPATCH,N) (DcType& dc, \
                                                            procid_t source, \
                                                            unsigned char packet_type_mask, \
                                                            const char* buf, size_t len) { \
    iarchive iarc(buf, len);                                                \
    F f;                                                                \
    deserialize(iarc, (char*)(&f), sizeof(F));                          \
    size_t objid;                                                       \
    iarc >> objid;                                                      \
    T* obj = reinterpret_cast<T*>(dc.get_registered_object(objid));     \
    size_t id; iarc >> id;                                              \
    BOOST_PP_REPEAT(N, GENPARAMS, _);                                   \
    typename function_ret_type<__GLRPC_FRESULT>::type ret =                     \
      mem_function_ret_type<__GLRPC_FRESULT>::BOOST_PP_CAT(fcall, N)            \
      (f, obj BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENNIARGS ,_));      \
    BOOST_PP_REPEAT(N, CHARSTRINGFREE, _);                              \
    boost::iostreams::stream<resizing_array_sink> retstrm(128);         \
    oarchive oarc(retstrm);                                             \
    oarc << ret;                                                        \
    retstrm.flush();                                                    \
    if ((packet_type_mask & CONTROL_PACKET) == 0) {                     \
      dc.get_rmi_instance(objid)->inc_calls_received(source);           \
      dc.get_rmi_instance(objid)->inc_bytes_sent(source, retstrm->len); \
    }                                                                   \
    /*std::cerr << "Request wait on " << id << std::endl ; */           \
    if (packet_type_mask & CONTROL_PACKET) {                            \
      dc.control_call(source,                                           \
                      request_reply_handler,                          \
                      id,                                               \
                      blob(retstrm->str, retstrm->len));                \
    } else if(packet_type_mask & FLUSH_PACKET) {                        \
      dc.reply_remote_call(source,                                           \
                      request_reply_handler,                          \
                      id,                                               \
                      blob(retstrm->str, retstrm->len));                \
    }  else {                                                            \
      dc.remote_call(source,                                       \
                     request_reply_handler,                      \
                     id,                                           \
                     blob(retstrm->str, retstrm->len));            \
    }                                                                   \
    free(retstrm->str);                                                 \
    /* std::cerr << "Request received on " << id << std::endl ; */      \
  } 


BOOST_PP_REPEAT(6, NONINTRUSIVE_DISPATCH_GENERATOR, _)


#undef GENFN
#undef GENFN2
#undef GENNIARGS
#undef GENPARAMS
#undef NONINTRUSIVE_DISPATCH_GENERATOR

} // namespace dc_impl
} // namespace graphlab
#include <graphlab/rpc/mem_function_arg_types_undef.hpp>
#endif


================================================
FILE: src/graphlab/rpc/object_request_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef OBJECT_REQUEST_ISSUE_HPP
#define OBJECT_REQUEST_ISSUE_HPP
#include <sstream>
#include <iostream>
#include <string>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/object_request_dispatch.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <graphlab/rpc/mem_function_arg_types_def.hpp>
#include <graphlab/rpc/request_future.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <boost/preprocessor.hpp>

namespace graphlab {
namespace dc_impl {


#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


/**
\internal
\ingroup rpc
\file object_request_issue.hpp


This is an internal function and should not be used directly

This is the marshall function for the an object member function call.
This is very similar to the standard function request issue in request_issue.hpp
, with the only difference that an object id has to be transmitted

\code
template < typename T, typename F, typename T0 > 
class object_request_issue1 {
 public:
  static void exec (dc_dist_object_base * rmi, dc_send * sender,
                    size_t request_handle, unsigned char flags,
                    procid_t target, size_t objid, F remote_function,
                    const T0 & i0) {
    oarchive *ptr = get_thread_local_buffer (target);
    oarchive & arc = *ptr;
    size_t len =
      dc_send::write_packet_header (arc, _get_procid (), flags,
				    _get_sequentialization_key ());
    uint32_t beginoff = arc.off;
    dispatch_type d =
      dc_impl::OBJECT_NONINTRUSIVE_REQUESTDISPATCH1 < distributed_control, T,
      F, T0 >;
    arc << reinterpret_cast < size_t > (d);
    serialize (arc, (char *) (&remote_function), sizeof (remote_function));
    arc << objid;
    arc << request_handle;
    arc << i0;
    uint32_t curlen = arc.off - beginoff;
    *(reinterpret_cast < uint32_t * >(arc.buf + len)) = curlen;
    release_thread_local_buffer (target, flags & CONTROL_PACKET);
    if ((flags & CONTROL_PACKET) == 0)
      rmi->inc_bytes_sent (target, curlen);
    pull_flush_thread_local_buffer (target);
  }
};
\endcode


*/
#define REMOTE_REQUEST_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
template<typename T,typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(FNAME_AND_CALL, N) { \
  public: \
  static void exec(dc_dist_object_base* rmi, dc_send* sender, size_t request_handle, unsigned char flags, procid_t target,size_t objid, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive* ptr = get_thread_local_buffer(target);  \
    oarchive& arc = *ptr;                         \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(dc_impl::OBJECT_NONINTRUSIVE_REQUESTDISPATCH,N)<distributed_control,T,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >;  \
    arc << reinterpret_cast<size_t>(d);       \
    serialize(arc, (char*)(&remote_function), sizeof(remote_function)); \
    arc << objid;       \
    arc << request_handle; \
    BOOST_PP_REPEAT(N, GENARC, _)                \
    uint32_t curlen = arc.off - beginoff;   \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = curlen; \
    release_thread_local_buffer(target, flags & CONTROL_PACKET); \
    if ((flags & CONTROL_PACKET) == 0)                       \
      rmi->inc_bytes_sent(target, curlen);           \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(target); \
  }\
};

BOOST_PP_REPEAT(6, REMOTE_REQUEST_ISSUE_GENERATOR,  object_request_issue )


#undef GENARC
#undef GENT
#undef GENARGS
#undef REMOTE_REQUEST_ISSUE_GENERATOR


} // namespace dc_impl
} // namespace graphlab
#include <graphlab/rpc/mem_function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/pod_template_structs.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef POD_TEMPLATE_STRUCTS_HPP
#define POD_TEMPLATE_STRUCTS_HPP
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/serialization/is_pod.hpp>

namespace graphlab {
namespace dc_impl {
namespace pod_template_detail {

template <typename F>
struct pod_call_struct0 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
};


template <typename F, typename T0>
struct pod_call_struct1 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0;
};


template <typename F, typename T0, typename T1>
struct pod_call_struct2 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1;
};

template <typename F, typename T0, typename T1, typename T2>
struct pod_call_struct3 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2;
};


template <typename F, typename T0, typename T1, typename T2,
          typename T3>
struct pod_call_struct4 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2; T3 t3;
};


template <typename F, typename T0, typename T1, typename T2,
          typename T3, typename T4>
struct pod_call_struct5 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2; T3 t3; T4 t4;
};


template <typename F, typename T0, typename T1, typename T2,
          typename T3, typename T4, typename T5>
struct pod_call_struct6 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2; T3 t3; T4 t4; T5 t5;
};


template <typename F, typename T0, typename T1, typename T2,
          typename T3, typename T4, typename T5,
          typename T6>
struct pod_call_struct7 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6;
};


template <typename F, typename T0, typename T1, typename T2,
          typename T3, typename T4, typename T5,
          typename T6, typename T7>
struct pod_call_struct8 : public IS_POD_TYPE{
  size_t dispatch_function;
  size_t objid;
  F remote_function;
  T0 t0; T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6; T7 t7;
};

}
}
}

#endif

================================================
FILE: src/graphlab/rpc/request_dispatch.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef REQUEST_DISPATCH_HPP
#define REQUEST_DISPATCH_HPP
#include <sstream>
#include <iostream>
#include <string>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>
#include <boost/preprocessor.hpp>

namespace graphlab {
namespace dc_impl{

/**
\internal
\ingroup rpc
\file request_dispatch.hpp

This is an internal function and should not be used directly.

Given  function F, as well as input types T1 ... Tn
it will construct an input archive and deserialize the types T1.... Tn,
and call the function f with it. The return value of the function
is then returned to the caller through the reply call to the 
source's request_reply_handler. This code dispatches to the "intrusive" 
form of a function call (that is the function call must take a distributed_control
and a "procid_t source" as its first 2 arguments.

For instance, the 1 argument of this will be DISPATCH1:
\code
template < typename DcType, typename F, typename T0 > 
void REQUESTDISPATCH1 (DcType & dc, procid_t source,
                       unsigned char packet_type_mask, 
                       const char *buf, size_t len) {
  iarchive iarc (buf, len);
  size_t s;
  iarc >> s;
  F f = reinterpret_cast < F > (s);
  size_t id;
  iarc >> id;
  T0 (f0);
  iarc >> (f0);
  typename function_ret_type < 
    typename boost::remove_const <
    typename boost::remove_reference < 
    typename boost::function <
    typename boost::remove_pointer <
    F >::type >::result_type >::type >::type >::type ret =
          function_ret_type < 
            typename boost::remove_const <
            typename boost::remove_reference < 
            typename boost::function <
            typename boost::remove_pointer <F>::type >::result_type >::type >::type >::fcall3 (f, dc, source, (f0));
  charstring_free (f0);
  boost::iostreams::stream < resizing_array_sink > retstrm (128);
  oarchive oarc (retstrm);
  oarc << ret;
  retstrm.flush ();
  if (packet_type_mask & CONTROL_PACKET) {
    dc.control_call (source, request_reply_handler, id,
		     blob (retstrm->str, retstrm->len));
  }
  else {
    dc.reply_remote_call (source, request_reply_handler, id,
			  blob (retstrm->str, retstrm->len));
  }
  free (retstrm->str);
}
\endcode

charstring_free is a special template function which calls free(f1)
only if f1 is a character array (char*)

Note that the template around DcType is *deliberate*. This prevents this
function from instantiating the distributed_control until as late as possible, 
avoiding problems with circular references.

*/
#define GENFN(N) BOOST_PP_CAT(__GLRPC_F, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENARGS(Z,N,_)  (BOOST_PP_CAT(f, N))
#define GENPARAMS(Z,N,_)  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ; iarc >> (BOOST_PP_CAT(f, N)) ;
#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));


#define DISPATCH_GENERATOR(Z,N,_) \
template<typename DcType, typename F  BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
void BOOST_PP_CAT(REQUESTDISPATCH,N) (DcType& dc, procid_t source, unsigned char packet_type_mask, \
               const char* buf, size_t len) { \
  iarchive iarc(buf, len); \
  size_t s; iarc >> s; F f = reinterpret_cast<F>(s); \
  size_t id; iarc >> id;    \
  BOOST_PP_REPEAT(N, GENPARAMS, _)                \
  typename function_ret_type<__GLRPC_FRESULT>::type ret = function_ret_type<__GLRPC_FRESULT>::BOOST_PP_CAT(fcall, BOOST_PP_ADD(N, 2))   \
                                                  (f, dc, source BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_)); \
  BOOST_PP_REPEAT(N, CHARSTRINGFREE, _)                \
  boost::iostreams::stream<resizing_array_sink> retstrm(128);    \
  oarchive oarc(retstrm); \
  oarc << ret; \
  retstrm.flush(); \
  if (packet_type_mask & CONTROL_PACKET) { \
    dc.control_call(source, request_reply_handler, id, blob(retstrm->str, retstrm->len));\
  } \
  else {  \
    dc.reply_remote_call(source, request_reply_handler, id, blob(retstrm->str, retstrm->len));\
  } \
  free(retstrm->str);                                                 \
} 

BOOST_PP_REPEAT(7, DISPATCH_GENERATOR, _)

#undef GENFN
#undef GENFN2
#undef GENARGS
#undef GENPARAMS
#undef DISPATCH_GENERATOR

/**
Same as above, but is the non-intrusive version.
*/
#define GENFN(N) BOOST_PP_CAT(NIF, N)
#define GENFN2(N) BOOST_PP_CAT(f, N)
#define GENNIARGS(Z,N,_)  (BOOST_PP_CAT(f, N))
#define GENPARAMS(Z,N,_)  BOOST_PP_CAT(T, N) (BOOST_PP_CAT(f, N)) ; iarc >> (BOOST_PP_CAT(f, N)) ;
#define CHARSTRINGFREE(Z,N,_)  charstring_free(BOOST_PP_CAT(f, N));

#define NONINTRUSIVE_DISPATCH_GENERATOR(Z,N,_) \
template<typename DcType, typename F  BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
void BOOST_PP_CAT(NONINTRUSIVE_REQUESTDISPATCH,N) (DcType& dc, procid_t source, unsigned char packet_type_mask, \
               const char* buf, size_t len) { \
  iarchive iarc(buf, len); \
  size_t s; iarc >> s; F f = reinterpret_cast<F>(s); \
  size_t id; iarc >> id;    \
  BOOST_PP_REPEAT(N, GENPARAMS, _)                \
  typename function_ret_type<__GLRPC_FRESULT>::type ret = function_ret_type<__GLRPC_FRESULT>::BOOST_PP_CAT(fcall, N) \
                                          (f BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENNIARGS ,_)); \
  BOOST_PP_REPEAT(N, CHARSTRINGFREE, _)                \
  boost::iostreams::stream<resizing_array_sink> retstrm(128);    \
  oarchive oarc(retstrm); \
  oarc << ret; \
  retstrm.flush(); \
  if (packet_type_mask & CONTROL_PACKET) { \
    dc.control_call(source, request_reply_handler, id, blob(retstrm->str, retstrm->len));\
  } \
  else if(packet_type_mask & FLUSH_PACKET) {  \
    dc.reply_remote_call(source, request_reply_handler, id, blob(retstrm->str, retstrm->len));\
  } \
  else {  \
    dc.remote_call(source, request_reply_handler, id, blob(retstrm->str, retstrm->len));\
  } \
  free(retstrm->str);                                                 \
} 

BOOST_PP_REPEAT(7, NONINTRUSIVE_DISPATCH_GENERATOR, _)


#undef GENFN
#undef GENFN2
#undef GENNIARGS
#undef GENPARAMS
#undef NONINTRUSIVE_DISPATCH_GENERATOR

} // namespace dc_impl
} // namespace graphlab
#include <graphlab/rpc/function_arg_types_undef.hpp>
#endif


================================================
FILE: src/graphlab/rpc/request_future.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef OBJECT_REQUEST_FUTURE_HPP
#define OBJECT_REQUEST_FUTURE_HPP
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/function_ret_type.hpp>

namespace graphlab {


  /**
   * \ingroup rpc
   * The result of a remote_request future call.
   * This class represents the outcome of a remote request sent to another
   * machine via the future-based remote_request_call. The future remote_request call
   * returns immediately with this object. Only when operator() is called on this
   * object, then it waits for a result from the remote machine. All remote_request
   * calls which return futures are linked below.
   *
   * example:
   * \code
   * // this function returns immediately
   * graphlab::request_future<int> res = 
   *   rmi.future_remote_request(SOME_OTHER_MACHINE, 
   *                             function_which_returns_an_integer, ...);
   *
   * ... we can do other stuff ... 
   * // read the result, or wait for the result if it is not done yet.
   * int actual_result = res();
   * \endcode
   *
   * \see graphlab::distributed_control::future_remote_request
   *      graphlab::dc_dist_object::future_remote_request
   *      graphlab::fiber_remote_request
   *      graphlab::object_fiber_remote_request
   *
   * The future object holds a copy of the result of the request, and the
   * operator() call returns a reference to this result (once it is available).
   */
template <typename T>
struct request_future {
  typedef typename dc_impl::function_ret_type<T>::type result_type;
  mutable std::auto_ptr<dc_impl::ireply_container> reply;
  result_type result;
  bool hasval;

  /// default constructor
  request_future(): 
      reply(new dc_impl::basic_reply_container),
      hasval(false) { }


  /** constructor which allows you to specify a custom target container
   * This class takes ownership of the container and will free it when done.
   */
  request_future(dc_impl::ireply_container* container): 
      reply(container),
      hasval(false) { }


  /** We can assign return values directly to the future in the
   * case where no remote calls are necessary. 
   * Thus allowing the following to be written easily:
   * \code
   * request_future<int> a_function(int arg) {
   *   if (arg == 0) return rmi.future_remote_request(... somewhere else ...) ;
   *   else return 10;
   * }
   * \endcode
   */
  request_future(const T& val): 
      reply(NULL),
      result(val), 
      hasval(true) { }

  /// copy constructor 
  request_future(const request_future<T>& val): 
      reply(val.reply),
      result(val.result), 
      hasval(val.hasval) { }

  /// operator=
  request_future& operator=(const request_future<T>& val) {
    reply = val.reply;
    result = val.result;
    hasval = val.hasval;
    return *this;
  }

  /**
   * \internal
   * Returns a handle to the underlying container
   */
  size_t get_handle() {
    return reinterpret_cast<size_t>(reply.get());
  }

  /**  
   * Waits for the request if it has not yet been received.
   */
  void wait() {
    if (!hasval) {
      reply->wait(); 
      dc_impl::blob& receiveddata = reply->get_blob();
      iarchive iarc(receiveddata.c, receiveddata.len); 
      iarc >> result;  
      receiveddata.free(); 
      hasval = true;
    }
  }

  /**
   * Returns true if the result is ready and \ref operator()
   * can be called without blocking.
   */
  bool is_ready() {
    return (hasval || reply->ready());
  }

  /**
   * Waits for the request if it has not yet been received.
   * When the result is ready, it returns a reference to the received value.
   */
  result_type& operator()() {
    if (!hasval) wait();
    return result;
  }
};


template <>
struct request_future<void> {
  typedef dc_impl::function_ret_type<void>::type result_type;
  mutable std::auto_ptr<dc_impl::ireply_container> reply;
  bool hasval;

  request_future(): 
      reply(new dc_impl::basic_reply_container),
      hasval(false) { }

  request_future(dc_impl::ireply_container* container): 
      reply(container),
      hasval(false) { }

  request_future(int val): 
      reply(NULL),
      hasval(true) { }
 
 
  request_future(const request_future<void>& val): 
      reply(val.reply),
      hasval(val.hasval) { }

  request_future& operator=(const request_future<void>& val) {
    reply = val.reply;
    hasval = val.hasval;
    return *this;
  }

  bool is_ready() {
    return (hasval || reply->ready());
  }


  size_t get_handle() {
    return reinterpret_cast<size_t>(reply.get());
  }

  void wait() {
    if (!hasval) {
      result_type result;
      reply->wait(); 
      dc_impl::blob& receiveddata = reply->get_blob();
      iarchive iarc(receiveddata.c, receiveddata.len); 
      iarc >> result;  
      receiveddata.free(); 
      hasval = true;
    }
  }

  result_type operator()() {
    if (!hasval) wait();
    return 0;
  }
};


}
#endif


================================================
FILE: src/graphlab/rpc/request_issue.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef REQUEST_ISSUE_HPP
#define REQUEST_ISSUE_HPP
#include <sstream>
#include <iostream>
#include <string>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/rpc/dc_types.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>
#include <graphlab/rpc/request_future.hpp>
#include <graphlab/rpc/request_dispatch.hpp>
#include <graphlab/rpc/function_ret_type.hpp>
#include <graphlab/rpc/function_arg_types_def.hpp>
#include <graphlab/rpc/dc_thread_get_send_buffer.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <boost/preprocessor.hpp>

namespace graphlab {
namespace dc_impl {

/**

\internal
\ingroup rpc
\file request_issue.hpp

This is an internal function and should not be used directly.

This is an internal function and should not be used directly.
A request is an RPC which is performed "synchronously". The return value of the
function is returned.

The format of the RPC request is in the form of an archive and is as follows

The format of a "request" packet is in the form of an archive and is as follows

\li (dispatch_type*) -- pointer to target machine's dispatcher function
\li (void*)          -- pointer to target function
\li size_t           -- return ID
\li fn::arg1_type    -- target function's 1st argument
\li fn::arg2_type    -- target function's 2nd argument
\li  ...
\li fn::argN_type    -- target function's Nth argument


The ID here is a pointer to a ireply_container datastructure. When the remote machine completes
the function call, it will issue an RPC to the function reply_increment_counter on the originating machine.
The reply_increment_counter function  store the serialized return value in the ireply_container , as well
as perform an atomic increment on the ireply_container .

Here is an example of the marshall code for 1 argument
\code
namespace request_issue_detail {
  template < typename BoolType, typename F, typename T0 > 
  struct dispatch_selector1 {
    static dispatch_type dispatchfn () {
      return dc_impl::NONINTRUSIVE_REQUESTDISPATCH1 < distributed_control, F,
	T0 >;
    }
  };
  template < typename F, typename T0 > 
  struct dispatch_selector1 <boost::mpl::bool_ < true >, F, T0 > {
    static dispatch_type dispatchfn () {
      return dc_impl::REQUESTDISPATCH1 < distributed_control, F, T0 >;
    }
  };
}

template < typename F, typename T0 > 
class remote_request_issue1 {
 public:
  static void exec (dc_send * sender, size_t request_handle,
                    unsigned char flags, procid_t target, F remote_function,
                    const T0 & i0) {
    oarchive *ptr = get_thread_local_buffer (target);
    oarchive & arc = *ptr;
    size_t len =
      dc_send::write_packet_header (arc, _get_procid (), flags,
				    _get_sequentialization_key ());
    uint32_t beginoff = arc.off;
    dispatch_type d =
      request_issue_detail::dispatch_selector1 < typename is_rpc_call <
      F >::type, F, T0 >::dispatchfn ();
    arc << reinterpret_cast < size_t > (d);
    arc << reinterpret_cast < size_t > (remote_function);
    arc << request_handle;
    arc << i0;
    *(reinterpret_cast < uint32_t * >(arc.buf + len)) = arc.off - beginoff;
    release_thread_local_buffer (target, flags & CONTROL_PACKET);
    pull_flush_thread_local_buffer (target);
  }
};
\endcode

If the pointer to the dispatcher function is NULL, the next argument
will contain the name of the function. This is a "portable" call.
\see portable_issue.hpp
*/

#define GENARGS(Z,N,_)  BOOST_PP_CAT(const T, N) BOOST_PP_CAT(&i, N)
#define GENT(Z,N,_) BOOST_PP_CAT(T, N)
#define GENARC(Z,N,_) arc << BOOST_PP_CAT(i, N);


/**
The dispatch_selectorN structs are used to pick between the standard dispatcher and the nonintrusive dispatch
by checking if the function is a RPC style call or not.
*/
#define REMOTE_REQUEST_ISSUE_GENERATOR(Z,N,FNAME_AND_CALL) \
namespace request_issue_detail {      \
template <typename BoolType, typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct BOOST_PP_CAT(dispatch_selector, N){  \
  static dispatch_type dispatchfn() { return BOOST_PP_CAT(dc_impl::NONINTRUSIVE_REQUESTDISPATCH,N)<distributed_control,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >; }  \
};\
template <typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
struct BOOST_PP_CAT(dispatch_selector, N)<boost::mpl::bool_<true>, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T)>{  \
  static dispatch_type dispatchfn() { return BOOST_PP_CAT(dc_impl::REQUESTDISPATCH,N)<distributed_control,F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N, GENT ,_) >; } \
}; \
}\
template<typename F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, typename T)> \
class  BOOST_PP_CAT(FNAME_AND_CALL, N) { \
  public: \
  static void exec(dc_send* sender, size_t request_handle, unsigned char flags, procid_t target, F remote_function BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM(N,GENARGS ,_) ) {  \
    oarchive* ptr = get_thread_local_buffer(target);  \
    oarchive& arc = *ptr;                         \
    size_t len = dc_send::write_packet_header(arc, _get_procid(), flags, _get_sequentialization_key()); \
    uint32_t beginoff = arc.off; \
    dispatch_type d = BOOST_PP_CAT(request_issue_detail::dispatch_selector,N)<typename is_rpc_call<F>::type, F BOOST_PP_COMMA_IF(N) BOOST_PP_ENUM_PARAMS(N, T) >::dispatchfn();   \
    arc << reinterpret_cast<size_t>(d);       \
    arc << reinterpret_cast<size_t>(remote_function); \
    arc << request_handle; \
    BOOST_PP_REPEAT(N, GENARC, _)                \
    *(reinterpret_cast<uint32_t*>(arc.buf + len)) = arc.off - beginoff; \
    release_thread_local_buffer(target, flags & CONTROL_PACKET); \
    if (flags & FLUSH_PACKET) pull_flush_soon_thread_local_buffer(target); \
  }\
};


/**
Generates a function call issue. 3rd argument is the issue name
*/
BOOST_PP_REPEAT(7, REMOTE_REQUEST_ISSUE_GENERATOR,  remote_request_issue )


#undef GENARC
#undef GENT
#undef GENARGS
#undef REMOTE_REQUEST_ISSUE_GENERATOR


} // namespace dc_impl
} // namespace graphlab
#include <graphlab/rpc/function_arg_types_undef.hpp>

#endif


================================================
FILE: src/graphlab/rpc/request_reply_handler.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <string>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/request_reply_handler.hpp>

namespace graphlab {

void request_reply_handler(distributed_control &dc, procid_t src, 
                           size_t ptr, dc_impl::blob ret) {
  dc_impl::ireply_container* a = reinterpret_cast<dc_impl::ireply_container*>(ptr);
  a->receive(src, ret);
}


}


================================================
FILE: src/graphlab/rpc/request_reply_handler.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef REPLY_INCREMENT_COUNTER_HPP
#define REPLY_INCREMENT_COUNTER_HPP
#include <string>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
namespace graphlab {

class distributed_control;

namespace dc_impl {
/**
\ingroup rpc
\internal
A wrapper around a char array. This structure 
is incapable of freeing itself and must be managed externally
*/
struct blob {
  /// Constructs a blob containing a pointer to a character array with length len
  blob(char* c, size_t len):c(c),len(len) { };
  blob():c(NULL), len(0){ };
  
  char *c;  ///< stored pointer 
  size_t len; ///< stored length
  
  
  /// serialize the char array
  void save(oarchive& oarc) const {
    oarc << len;
    if (len > 0) serialize(oarc, c, len);
  }
  
  /// deserializes a char array. If there is already a char array here, it will be freed
 void load(iarchive& iarc) {
    if (c) ::free(c);
    c = NULL;
    iarc >> len;
    if (len > 0) {
      c = (char*) malloc(len);
      deserialize(iarc, c, len);
    }
  }
  
  /// Free the stored char array.
  void free() {
    if (c) {
      ::free(c);
      c = NULL;
      len = 0;
    }
  }
};


/**
 *\internal
 * \ingroup rpc
 * Abstract class for where the result of a request go into.
 */
struct ireply_container {
  ireply_container() { }
  virtual ~ireply_container() { }
  virtual void wait() = 0;
  virtual void receive(procid_t source, blob b) = 0;
  virtual bool ready() const = 0;
  virtual blob& get_blob() = 0;
};


/**
\internal
\ingroup rpc
The most basic container for replies. Only waits for one reply,
and uses a mutex/condition variable pair to lock and wait on the reply value.
\see ireply_container 
*/
struct basic_reply_container: public ireply_container{
  blob val;
  mutex mut;
  conditional cond;
  bool valready;
  /**
   * Constructs a reply object which waits for 'retcount' replies.
   */
  basic_reply_container():valready(false) { }
  
  ~basic_reply_container() { 
    val.free();
  }

  void receive(procid_t source, blob b) {
    mut.lock();
    val = b;
    valready = true;
    cond.signal();
    mut.unlock();
  }
  /**
   * Waits for all replies to complete. It is up to the 
   * reply implementation to decrement the counter.
   */
  inline void wait() {
    mut.lock();
    while(!valready) cond.wait(mut);
    mut.unlock();
  }

  inline bool ready() const {
    return valready;
  }

  blob& get_blob() {
    return val;
  }
};


} // namespace dc_impl


/**
 * \internal
 * \ingroup rpc
 * The RPC call to handle the result of a request.
 *
 * The basic protocol of a request is as such:
 * On the sender side, a request_future is created which contains within it
 * an instance of an ireply_container. A message is then sent to the target
 * machine containing the address of the ireply_container.
 * Once the target machine finishes evaluating the function, it issues a
 * call to the request_reply_handler function, passing the original address
 * into the ptr argument. The request_reply_handler then reinterprets the ptr
 * argument as an ireply_container object and calls the receive() function 
 * on it.
 * \see ireply_container
 */
void request_reply_handler(distributed_control &dc, procid_t src, 
                             size_t ptr, dc_impl::blob ret);


} // namespace graphlab

#endif


================================================
FILE: src/graphlab/rpc/rpc.dox
================================================
/**
\page RPC GraphLab RPC

GraphLab RPC primary design goal was to provide a convenient and easy to use
asynchronous communication system between \b identical binaries running
on different machines over a distributed network. It therefore provides 
MPI-like capabilities together with RPC functionality. The GraphLab distributed 
implementation is built on top of this RPC library.

GraphLab RPC uses extensive template meta-programming techniques to provide
an \b IDL-free (http://en.wikipedia.org/wiki/Interface_description_language) 
RPC system, allowing arbitrary functions to be called on program running on 
remote machines (Note that all machines must be running the same binary).

For instance, this is a particularly interesting example:
\code
#include <iostream>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
using namespace graphlab;

int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);

  distributed_control dc;
  
  if (dc.procid() == 0 && dc.numprocs() >= 2) {
    dc.remote_call(1, printf, "%d + %f = %s\n", 1, 2.0, "three");
  }
  dc.barrier();
}
\endcode

The distributed_control constructor will first detect if MPI is initialized,
and if it is, will use MPI to perform initialization (\ref sec_spawning_mpi).
If MPI is not initialized, then the constructor will check if an alternate
spawning process using environment variables is used (\ref sec_spawning_rpcexec).
The environment variable based spawning process is less reliable, but useful
in situations where MPI is not available.


Once the distributed_control object is created, \ref graphlab::distributed_control::procid "dc.procid()"
provides the current machine number, while \ref graphlab::distributed_control::numprocs "dc.numprocs()"
provide the total number of machines.


The if-condition is therefore entered by only the first machine, which
performs a remote call to the second machine (the first argument of remote_call
is the target machine ID). The second machine will then
execute the equivalent of
\code
  printf("%d + %f = %s\n", 1, 2.0, "three");
\endcode

We will discuss the different aspects of the RPC library seperately:
\li \ref Spawning \n
         Initialization and Starting a distributed program using GraphLab RPC
\li \ref Basic_RPC \n
         Basic usage of the RPC library. Calling of simple functions.
\li \ref OOP_RPC \n
         Advanced usage of the RPC library. Creating and managing 
         distributed object contexts.
\li \ref Fiber_RPC \n
         Fiber-compatible remote request calls.

\section sec_examples Examples
The tests/ directory include a collection of nine RPC examples demonstrating
all the key features.

\li RPC Example 1: Basic Synchronous RPC \ref rpc_example1.cpp
\li RPC Example 2: Asynchronous RPC with Built-in Serialization \ref rpc_example2.cpp
\li RPC Example 3: Asynchronous RPC with Struct POD Serialization \ref rpc_example3.cpp
\li RPC Example 4: Asynchronous RPC with Manual Serialization \ref rpc_example4.cpp
\li RPC Example 5: Asynchronous RPC to printf \ref rpc_example5.cpp
\li RPC Example 6: Asynchronous RPC with graphlab::any \ref rpc_example6.cpp
\li RPC Example 7: Distributed Object \ref rpc_example7.cpp
\li RPC Example 8: RPC using iterators over machines \ref rpc_example8.cpp
\li RPC Example 9: Distributed Object RPC using iterators over machines \ref rpc_example9.cpp


\section sec_spawning Spawning and Initialization
Spawning is the process of starting an instance of GraphLab RPC on seperate 
machines. GraphLab RPC supports two spawning methods: MPI or rpcexec.py 
(a script in the scripts/ directory). The MPI method is <b>strongly recommended</b>
and is the most reliable.


\subsection sec_spawning_mpi Spawning with MPI
GraphLab was tested with MPICH2, but should also with OpenMPI.
Refer to the documentation for MPICH2 or OpenMPI to set up MPI and make sure
that you can run the basic test MPI programs (MPICH2 comes with an mpdringtest). 

No additional configuration is necessary to spawn a GraphLab RPC program with MPI.

The GraphLab RPC program should begin with:

\code
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;

int main(int argc, char ** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;
  ...
}
\endcode

In this case, distributed_control detects that MPI was initialized
prior and will use MPI to perform initial negotiation of port numbers.

\subsection sec_spawning_rpcexec Spawning with rpcexec.py
rpcexec.py provides an alternative, less reliable way 
to run a process on a collection of machines,
using ssh to communicate between them. <tt>rpcexec.py --help</tt> provides
some basic help.

You will first need to create a host file which is simply a list of host names
and IP addresses:
\verbatim
localhost
192.168.1.5
node2
node3
localhost
192.168.1.5
node2
node3
\endverbatim

Running <tt>rpcexec.py -n [num to start] -f [hostsfile] `command`</tt> will read the first
execute the command on the first N hosts in the hostfile. For instance in this case, running
\verbatim
rpcexec.py -n 5 -f hostsfile ls
\endverbatim
will run the <tt>ls</tt> bash command twice on the localhost, and once on 
the three nodes : 192.168.1.5, node2, node3.

rpcexec.py also supports a 'screen' (GNU Screen) mode. Running
\verbatim
rpcexec.py -s lsscreen -n 3 -f hostsfile ls
\endverbatim
will create a `screen` session with 3 windows where one window ran `ls` on the
localhost, while two other windows sshed into 192.168.1.5 and <tt>node2</tt>, 
running the `ls` on each of them. The screen session will be named "lsscreen" 

rpcexec.py will terminate immediately after creating the screen session.
\verbatim
screen -r lsscreen
\endverbatim
will display and resume the screen session.

If rpcexec.py is used to spawn the program, The GraphLab RPC program should 
begin with:
\code
#include <graphlab/rpc/dc.hpp>
using namespace graphlab;

int main(int argc, char ** argv) {
  distributed_control dc;
  ...
}
\endcode

Since unlike MPI spawning, there is no existing channel for communicating
port information between the machines. rpcexec.py therefore uses environment
variables to pass information to the GraphLab RPC process. The following 
two environment variables are used:
\li \b SPAWNNODES A comma seperated list of hostnames participating in the distributed program
\li \b SPAWNID: The index of the current machine into the SPAWNNODES list. First machine
has an index value of 0.

A machine will listen on the port 10000 + SPAWNID.

See \ref graphlab::dc_init_param "dc_init_param" for details about additional
configuration options.

This spawning system is less flexibile due to the fixed port numbering. For instance,
a crashed process will keep the port in TIMED_WAIT for a few minutes, preventing
the next RPC process from running. This also prevents multiple different GraphLab RPC programs
from running on the same set of the machines.

The MPI spawner is therefore the recommended method for starting the RPC system.


\section sec_rpc_usage RPC Usage Overview
 The graphlab::distributed_control object provides asynchronous, multi-threaded
 Remote Procedure Call (RPC) services to allow distributed GraphLab
 processes to communicate with each other. Currently, the only
 communication method implemented is TCP/IP. 

 Each process is assigned a sequential process ID at starting at 0. 
 i.e. The first process will have a process ID of 0, the second process
 will have an ID of 1, etc. graphlab::distributed_control::procid() can be used to
 obtain the current machine's process ID, and graphlab::distributed_control::numprocs()
 can be used to obtain the total number of processes. 

 The primary functions used to communicate between processes are
 graphlab::distributed_control::remote_call() and
 graphlab::distributed_control::remote_request(). These functions are thread-safe and
 can be called very rapidly as they only write into a local buffer.
 Communication is handled by a background thread. On the remote side,
 RPC calls are handled in parallel by a thread pool, and thus may be 
 parallelized arbitrarily. Operations such as
 graphlab::distributed_control::full_barrier(), or the sequentialization key
 can be used to get finer grained control over order of execution on the 
 remote machine.

 A few other additional helper functions are also provided to support 
 "synchronous" modes of communication. These functions are not thread-safe
 and can only be called on one thread per machine. These functions block 
 until all machines call the same function. For instance, if gather() is 
 called on one machine, it will not return until all machines call gather().

 \li graphlab::distributed_control::barrier()
 \li graphlab::distributed_control::full_barrier()
 \li graphlab::distributed_control::broadcast()
 \li graphlab::distributed_control::all_reduce()
 \li graphlab::distributed_control::all_reduce2()
 \li graphlab::distributed_control::gather()
 \li graphlab::distributed_control::all_gather()

\subsection sec_basic_rpc_usage Basic RPC

Once the distributed_control is set up, it can be used to call functions on remote machines.
For instance in the earlier example:
\code
if (dc.procid() == 0) {
  dc.remote_call(1, printf, "%d + %f = %s\n", 1, 2.0, "three");
}
\endcode
calls printf from machine 0 to machine 1 asynchronously.

In the GraphLab RPC terminology, a \b call is a one-way remote function call, while a 
\b request is a function call which has a return value. \b calls are executed 
asynchronously and returns immediately, while \b requests will wait for completion
of the function on the remote machine.

For instance in the code below, machine 1 could print 
either "hello world", or "world hello".
\code
if (dc.procid() == 0) {
  dc.remote_call(1, printf, "hello ");
  dc.remote_call(1, printf, "world ");
}
\endcode

Remote calls complete \b immediately, regardless of how long the function
took on the other side. For instance, processor 0 will take almost no time
running through this code.
\code
if (dc.procid() == 0) {
  dc.remote_call(1, sleep, 1);
}
\endcode

However, since requests will wait for completion and send back the reply,
this could take about a second to run.
\code
if (dc.procid() == 0) {
  dc.remote_request(1, sleep, 1);
}
\endcode

All arguments and return values will be passed by value. Any argument type
or return type can be used as long as it is \ref Serialization "serializable".
    

\subsection sec_rpc_collective Collective Operations
In addition to regular RPC operations, A collection of MPI-like collective
operations are also provided. A collective operation is a function which requires
all machines to call the same function before execution can proceed.

\subsubsection sec_rpc_collective_barrier Barrier
One of the most useful operations is graphlab::distributed_control::barrier()
The barrier() is functionally equivalent to MPI_Barrier(). It requires all machines
to hit the barrier, before execution is allowed to resume. 
For instance in the code below, while processor 0 is busy working at compute Pi, 
all other machines will pause at the barrier and wait for the processor 0 to complete
computation and hit the barrier, before execution can proceed.
\code
if (dc.procid() == 0) {
  compute Pi to 1 million digits
}
dc.barrier();
\endcode

\subsubsection sec_rpc_collective_fullbarrier Full Barrier
A \ref graphlab::distributed_control::full_barrier() "Full Barrier" is also provided
through graphlab::distributed_control::full_barrier(). A Full Barrier is like a barrier but 
guarantees that all RPC operations sent before the barrier must complete execution.

For instance in the example below,
The full barrier guarantees that the call to set_a_to_1() must complete on 
all remote machines before execution is allowed to proceed. 
All machines will therefore print '1'.
\code
int a = 0;
void set_a_to_1() { a = 1; }

int main(int argc, char** argv) {
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.remote_call( [ another machine ], set_a_to_1);
  dc.full_barrier();
  std::cout << a;
}
\endcode

The full_barrier is about 2-3x more costly than the regular barrier and should be
used sparingly.

\subsubsection sec_rpc_collective_other_collectives Other Collectives
In addition to the barrier and the full barrier, operations such as broadcast,
gather, all_gather are also provided.
Note that the implementation of these operations are not particularly efficient
as compared to native MPI implementations due to simplistic algorithm choices.

\subsection sec_rpc_sequentialization Sequentialization
A slightly more unusual feature of the GraphLab RPC system is the ability to
enforce sequentialization of a sequence of RPC calls. This is particularly
useful for asynchronous usages of this RPC library and can simplify code in many
cases.

For instance, in the code below:
\code
int a = 0;
void set_a_to_1() { a = 1; }
void print_a() { std::cout << a; }

int main(int argc, char** argv) {
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  targetmachine = (dc.procid() + 1) % dc.numprocs();
  dc.remote_call(targetmachine, set_a_to_1);
  dc.remote_call(targetmachine, print_a);
}
\endcode
Note that due to the asynchronous nature of the remote_call, it is possible for
<tt>print_a()</tt> to complete on the target machine, before the variable
<tt>a</tt> is set to 1.  Therefore, it is possible for the output to be '0'.

A possible solution as suggested before is to change the remote_calls to
remote_requests.  However, requests incur a large performance penalty due to
the need to wait for replies.

Alternatively, we can use the sequentialization key system:
\code
// set the sequentialization key to a non-zero value
char oldkey = graphlab::distributed_control::set_sequentialization_key(123);

dc.remote_call(targetmachine, set_a_to_1);
dc.remote_call(targetmachine, print_a);

graphlab::distributed_control::set_sequentialization_key(oldkey);
\endcode

Essentially all RPC calls made using the same key value (as long as the
key value is non-zero) will sequentialize.  This enforces that calls/requests
made while a key is set will always be processed by the same thread in the
thread pool on the target machine, ensuring sequentialization of the
<tt>set_a_to_1</tt> and the <tt>print_a</tt> call. 

The sequentialization key is unique to each \b thread (thread-local) so
sequentialization of RPC calls in one thread will not affect RPC calls made by
other threads.


\section OOP_RPC Distributed Objects

GraphLab provides a "distributed object" system which simplifies the process
of designing data structures which provide distributed computation and storage.

A GraphLab distributed object is an object which is instantiated at the same
time across all machines. The object internally contains a <tt>dc_dist_object</tt>
which provides RPC communication between distributed instances.

For instance, say we run the following code using two machines:
\code
int main(int argc, char** argv) {
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  graphlab::dht<std::string, std::string> str_map(dc);
  dc.barrier();

  if (dc.procid() == 0) {
    str_map.set("hello", "world");
  }
  else if (dc.procid() == 1) {
    str_map.set("something", "other");
  }
  dc.barrier();
  std::cout << str_map.get("hello").second;
  std::cout << str_map.get("something").second;
}
\endcode
The DHT is a distributed object which provides a distributed key/value
store (a distributed "Hash Table"). Every entry is stored at a machine 
corresponding to a hash of the key value. Note that it is created at the same time
on all the machines.  The barrier() after creation ensures that the object is
instantiated properly on all machines before utilization.

Now, after initialization, the <tt>set</tt> function of the dht will internally
hash the key value and forward it to the right machine for processing. <tt>get</tt>
is similar. However, since the distributed object system operates on \b instances,
it is possible to create multiple distributed objects easily. For instance,
the following code will create 50 different distributed key/value maps. 
str_map[15] corresponds to the same DHT when accessed on any machine.
\code
graphlab::dht<std::string, std::string>* str_map[50];
for (size_t i = 0;i < 50; ++i) {
  str_map[i] = new graphlab::dht<std::string, std::string>(dc); 
}
\endcode

\subsection sec_oop_rpc_usage Usage
We will demonstrate usage of the distributed object system using a simple 
distributed Hash Table example. Note that this is a \b very \b simple implementation,
and is not entirely correct since we are going to ignore thread-safety. But it is
sufficient to demonstrate the key concepts.

\code
class string_dht { 
 private:
  std::map<int, std::string> local_storage;
  mutable dc_dist_object<string_dht> rmi;
\endcode

First, each machine needs a local data storage. In this case we will simply 
use a std::map. The key object that provides distributed access is the 
<tt>dc_dist_object\<string_dht\> rmi;</tt>. This object creates a "context" 
for remote function calls, allowing the correct remote instance to be identified.

We will now look at the string_dht constructor. The rmi object constructor
requires a reference to the underlying distributed_control object, as well
as a pointer to the current instance:
\code
 public:
  string_dht(distributed_control &dc): rmi(dc, this) {  }
\endcode

Now, to demonstrate how the RMI object is used, lets see the set() function
\code
void set(int key, const std::string &newval) {  
  procid_t owningmachine = key % rmi.numprocs();
  if (owningmachine == rmi.procid()) {
    local_storage[key] = newval;
  }
\endcode
We use a simple hash function to identify where the key-value pair should be 
stored. Observe that the RMI object provides pretty much the same functionality
as the graphlab::distributed_control object, having both graphlab::dc_dist_object::numprocs()
and graphlab::dc_dist_object::procid(). If the data is to be stored in the current machine, 
we simply store it. Otherwise we will need to send it to a remote machine for
storage. This is the interesting case:

\code
  else {
    rmi.remote_call(owningmachine,
                    &string_dht::set,
                    key,
                    newval);
  }
}
\endcode

The RMI object supports the same family of call/request operations as 
\ref sec_rpc_dc "distributed_control"
However, it will only work with <b>member function pointers</b>. For instance in this case,
we will be calling the set() member function on the matching instance of the string_dht
object on a remote machine. (Note that the & is important and necessary)

The get() function is similar. However, we will have to use remote requests.

\code
std::string get(int key) {  
  procid_t owningmachine = key % rmi.numprocs();
  if (owningmachine == rmi.procid()) {
    return local_storage[key];
  }
  else {
    return rmi.remote_request(owningmachine,
                              &string_dht::get,
                              key);
  }
}
\endcode

As stated earlier, this code should not be used as it is due to several limitations
such as the local_storage object is not thread-safe. Since incoming RPC calls are
generally multithreaded, locks are necessary. See dht.hpp for an equivalent 
"safe" example of a simple DHT.

\subsection sec_oop_rpc_context Context
Essentially, the dc_dist_object object supports the identical set of operations as the 
distributed_control object, but restricted to the \b context of a single object instance.

It includes all the regular call operations:
\li graphlab::dc_dist_object::remote_call()
\li graphlab::dc_dist_object::remote_request()

Additionally, this \b context is entirely independent of the distributed_control
object, permitting its own set of collective operations such as
graphlab::dc_dist_object::broadcast, graphlab::dc_dist_object::barrier,
  graphlab::dc_dist_object::full_barrier, etc

Since these collective operations also operate entirely within the context of the object
instance, this permits the use of parallel collectives. For instance, I could have
two objects, and each object internally spawns threads to perform distributed computation;
using the RMI object to perform collective operations which are local to the object.

In particular, the graphlab::dc_dist_object::full_barrier() is worth taking note of. 
The graphlab::distributed_control::full_barrier() ensures completion of ALL RPC calls
including calls meant for distributed objects. Its barrier is therefore
\b global to the state of the program as a while.
The graphlab::dc_dist_object::full_barrier() however, only ensures completion of all RPC
calls within the object instance. Its barrier is therefore \b local to the state
of the distributed object. This allows each distributed object to run its own
full barriers without affecting other distributed objects.

\subsection sec_oop_rpc_notes Final Notes
Finally, note that the RMI object can ONLY call member function pointers.
It cannot call other global functions (such as printf).
The global context can be accessed through graphlab::dc_dist_object::dc() which
returns the underlying distributed_control object, which can then be used
to call global functions. For instance:
\code
rmi.dc().remote_call(1, printf, "hello ");
\endcode


\section Fiber_RPC Fiber Compatible Remote Requests
To support the fiber architecture required for the Warp Engine, we provide 
the following functions:

\li graphlab::fiber_remote_request() 
\li graphlab::object_fiber_remote_request()

These two functions are special in that unlike the remote_request functions,
they return immediately with a future object.

For instance:

\code
int add_one(int a) {
  return a + 1;
}

... /* elsewhere */
graphlab::request_future<int> future = fiber_remote_request(1, /* call to machine 1 */
                                                            add_one,
                                                            1);
\endcode

Waiting on the future, using either:
\code
int ret = future();
/* Or, more explicitly ... */
future.wait();
int ret = future();
\endcode

Will block until the result is available.
This wait, however, is optimized if the caller is in a fiber, in which case
the fiber is descheduled, allowing other fibers to execute while waiting for the 
result.

The graphlab::object_fiber_remote_request() function is similar, but allows
for calling of member functions of a class.

*/


================================================
FILE: src/graphlab/rpc/rpc_includes.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 

#ifndef GRAPHLAB_RPC_INCLUDES
#define GRAPHLAB_RPC_INCLUDES

#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>

#endif


================================================
FILE: src/graphlab/rpc/sample_sort.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_RPC_SAMPLE_SORT_HPP
#define GRAPHLAB_RPC_SAMPLE_SORT_HPP

#include <vector>
#include <algorithm>
#include <utility>
#include <graphlab/rpc/dc_dist_object.hpp>
#include <graphlab/rpc/buffered_exchange.hpp>
#include <graphlab/logger/assertions.hpp>
namespace graphlab {

namespace sample_sort_impl {
  template <typename Key, typename Value>
  struct pair_key_comparator {
    bool operator()(const std::pair<Key,Value>& k1,
                    const std::pair<Key,Value>& k2) {
      return k1.first < k2.first;
    }
  };
}

template <typename Key, typename Value>
class sample_sort {
 private:
  dc_dist_object<sample_sort<Key, Value> > rmi;

  typedef buffered_exchange<std::pair<Key, Value> > key_exchange_type;

  key_exchange_type key_exchange;
  std::vector<std::pair<Key, Value> > key_values;
 public:
  sample_sort(distributed_control& dc): rmi(dc, this), key_exchange(dc) { }

  template <typename KeyIterator, typename ValueIterator>
  void sort(KeyIterator kstart, KeyIterator kend,
            ValueIterator vstart, ValueIterator vend) {
    rmi.barrier();

    size_t num_entries = std::distance(kstart, kend);
    ASSERT_EQ(num_entries, std::distance(vstart, vend));

    // we will sample k * p entries
    std::vector<std::vector<Key> > sampled_keys(rmi.numprocs());
    for (size_t i = 0;i < 100 * rmi.numprocs(); ++i) {
      size_t idx = (rand() % num_entries); 
      sampled_keys[rmi.procid()].push_back(*(kstart + idx));
    }

    rmi.all_gather(sampled_keys);
    // collapse into a single array and sort
    std::vector<Key> all_sampled_keys;
    for (size_t i = 0;i < sampled_keys.size(); ++i) {
      std::copy(sampled_keys[i].begin(), sampled_keys[i].end(),
                std::inserter(all_sampled_keys, all_sampled_keys.end()));
    }
    // sort the sampled keys and extract the ranges
    std::sort(all_sampled_keys.begin(), all_sampled_keys.end());
    std::vector<Key> ranges(rmi.numprocs());
    ranges[0] = Key();
    for(size_t i = 1; i < rmi.numprocs(); ++i) {
      ranges[i] = all_sampled_keys[sampled_keys[0].size() * i];
    }

    // begin shuffle 
    KeyIterator kiter = kstart;
    ValueIterator viter = vstart;
    if (rmi.numprocs() < 8) {
      while(kiter != kend) {
        procid_t target_machine = 0;
        while (target_machine < rmi.numprocs() - 1  && 
               ranges[target_machine + 1] < *kiter) ++target_machine;
        key_exchange.send(target_machine, std::make_pair(*kiter, *viter));
        ++kiter; ++viter;
      }
    } 
    else {
      while(kiter != kend) {
        procid_t target_machine = 
          std::upper_bound(ranges.begin(), ranges.end(), *kiter) 
          - ranges.begin() - 1;
        key_exchange.send(target_machine, std::make_pair(*kiter, *viter));
        ++kiter; ++viter;
      }
    }
    key_exchange.flush();
 
    // read from key exchange 
    procid_t recvid;
    typename key_exchange_type::buffer_type buffer;
    while(key_exchange.recv(recvid, buffer)) {
      std::copy(buffer.begin(), buffer.end(), 
          std::inserter(key_values, key_values.end()));
    }
    std::sort(key_values.begin(), key_values.end(), 
        sample_sort_impl::pair_key_comparator<Key,Value>());

    rmi.barrier();
  }

  std::vector<std::pair<Key, Value> >& result() {
    return key_values;
  }
};


} // namespace graphlab

#endif


================================================
FILE: src/graphlab/rpc/thread_local_send_buffer.cpp
================================================
#include <graphlab/rpc/thread_local_send_buffer.hpp>
#include <graphlab/rpc/dc.hpp>
namespace graphlab {
namespace dc_impl {

thread_local_buffer::thread_local_buffer() {
  // allocate the buffers
  dc = distributed_control::get_instance();
  size_t nprocs = dc->numprocs(); 

  outbuf.resize(nprocs); 
  for (size_t i = 0;i < outbuf.size(); ++i) {
    outbuf[i] = new inplace_lf_queue2<buffer_elem>;
  }
  current_archive.resize(nprocs); 

  archive_locks.resize(nprocs);

  bytes_sent.resize(nprocs, 0);
  dc->register_send_buffer(this);
  procid = dc->procid();
}


thread_local_buffer::~thread_local_buffer() {
  dc->unregister_send_buffer(this);
  push_flush();
  // deallocate the buffers
  for (size_t i = 0; i < current_archive.size(); ++i) {
    if (current_archive[i].buf) {
      free(current_archive[i].buf);
      current_archive[i].buf = NULL;
    }
  }

  for (size_t i = 0;i < outbuf.size(); ++i) {
    delete outbuf[i];
  }
  outbuf.clear();
}

void thread_local_buffer::inc_calls_sent(procid_t target) {
  dc->inc_calls_sent(target);
}


void thread_local_buffer::push_flush() {
  for (size_t i = 0; i < outbuf.size(); ++i) {
    std::pair<buffer_elem*, buffer_elem*> bufs = extract(i);
    if (bufs.first != NULL) {
      while(bufs.first != bufs.second) {
        buffer_elem* prev = bufs.first;
        dc->write_to_buffer(i, bufs.first->buf, bufs.second->len);
        buffer_elem** next = &bufs.first->next;
        volatile buffer_elem** n = (volatile buffer_elem**)(next);
        while(__unlikely__((*n) == NULL)) {
          asm volatile("pause\n": : :"memory");
        }
        bufs.first = (buffer_elem*)(*n);
        delete prev;
      }
      dc->flush_soon(i);
    }
  }
}


void thread_local_buffer::pull_flush() {
  dc->flush();
}

void thread_local_buffer::pull_flush(procid_t p) {
  dc->flush(p);
}


void thread_local_buffer::pull_flush_soon() {
  dc->flush_soon();
}


void thread_local_buffer::pull_flush_soon(procid_t p) {
  dc->flush_soon(p);
}

oarchive* thread_local_buffer::acquire(procid_t target) {
  archive_locks[target].lock();
  // need a new archive, or existing one at risk of being resized
  if (current_archive[target].buf == NULL) {
    current_archive[target].buf = (char*)malloc(INITIAL_BUFFER_SIZE);
    current_archive[target].off = 0;
    current_archive[target].len = INITIAL_BUFFER_SIZE;
  }
  prev_acquire_archive_size = current_archive[target].off;
  return &current_archive[target];
}


void thread_local_buffer::add_to_queue(procid_t target, char* ptr, size_t len) {
  buffer_elem* elem = new buffer_elem;
  ASSERT_NE(ptr, NULL);
  elem->buf = ptr;
  elem->len = len;
  elem->next = NULL;
  outbuf[target]->enqueue(elem);
  if (outbuf[target]->approx_size() > NUM_FULL_BUFFER_LIMIT) {
    pull_flush_soon(target);
  }
}

void thread_local_buffer::release(procid_t target, bool do_not_count_bytes_sent) {
  if (!do_not_count_bytes_sent) {
    bytes_sent[target] += current_archive[target].off - prev_acquire_archive_size - sizeof(packet_hdr);
    inc_calls_sent(target);
  }

  if (current_archive[target].off >= FULL_BUFFER_SIZE_LIMIT) {
    // shift the buffer into outbuf
    char* ptr = current_archive[target].buf;
    size_t len = current_archive[target].off;
    current_archive[target].buf = NULL; 
    current_archive[target].off = 0;
    archive_locks[target].unlock();

    add_to_queue(target, ptr, len);

  } else {
    archive_locks[target].unlock();
  }
}


void thread_local_buffer::write(procid_t target, char* c, size_t len, 
                                bool do_not_count_bytes_sent) {
  if (!do_not_count_bytes_sent) {
    bytes_sent[target] += len;
    inc_calls_sent(target);
  }
  // make sure that messsages sent before this write are sent before this write
  if (current_archive[target].off) {
    archive_locks[target].lock();

    if (current_archive[target].off) {
      add_to_queue(target, current_archive[target].buf, current_archive[target].off);
    }
    current_archive[target].buf = NULL; 
    current_archive[target].off = 0;
    archive_locks[target].unlock();
  }
  add_to_queue(target, c, len);
}


std::pair<buffer_elem*, buffer_elem*> thread_local_buffer::extract(procid_t target) {
  if (current_archive[target].off > 0 ) {
    if (archive_locks[target].try_lock()) {
      char* ptr = current_archive[target].buf;
      size_t len = current_archive[target].off;
      if (len > 0) {
        current_archive[target].buf = NULL;
        current_archive[target].off = 0;
      }
      archive_locks[target].unlock();
      if (len > 0) {
        buffer_elem* elem = new buffer_elem;
        ASSERT_NE(ptr, NULL);
        elem->buf = ptr;
        elem->len = len;
        elem->next = NULL;
        outbuf[target]->enqueue(elem);
      }
    } 
  } 
  std::pair<buffer_elem*, buffer_elem*> ret;
  ret.first = outbuf[target]->dequeue_all();
  if (ret.first != NULL) {
    ASSERT_NE(ret.first->buf, NULL);
    ret.second = outbuf[target]->end_of_dequeue_list();
    return ret;
  } else {
    return std::pair<buffer_elem*, buffer_elem*>(NULL, NULL);
  }
}


} // dc_impl
} // graphlab


================================================
FILE: src/graphlab/rpc/thread_local_send_buffer.hpp
================================================
#ifndef GRAPHLAB_RPC_THREAD_LOCAL_SEND_BUFFER_HPP
#define GRAPHLAB_RPC_THREAD_LOCAL_SEND_BUFFER_HPP
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/rpc/dc_compile_parameters.hpp>
#include <graphlab/rpc/dc_internal_types.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/util/inplace_lf_queue2.hpp>
namespace graphlab {
class distributed_control;


namespace dc_impl {

struct thread_local_buffer {
  std::vector<inplace_lf_queue2<buffer_elem>* > outbuf;
  std::vector<size_t> bytes_sent;


  std::vector<mutex> archive_locks;
  std::vector<oarchive> current_archive;
  size_t prev_acquire_archive_size;

  procid_t procid;
  distributed_control* dc;

  thread_local_buffer();
  ~thread_local_buffer();

  /**
   * Must be called from within the thread owning this buffer.
   * Acquires a buffer to write to
   */
  oarchive* acquire(procid_t target);

  inline size_t get_bytes_sent(procid_t target) {
    return bytes_sent[target];
  }
  /**
   * Must be called from within the thread owning this buffer.
   * Releases a buffer previously acquired with acquire
   */
  void release(procid_t target, bool do_not_count_bytes_sent);

  void write(procid_t target, char* c, size_t len, bool do_not_count_bytes_sent);

  /**
   * Must be called from within the thread owning this buffer.
   * Flushes the buffer to the sender. This should really only be used
   * when the thread is dying since this incurs a large performance penalty by
   * locking up the sender.
   */
  void push_flush();


  /**
   * Can be called anywhere.
   * Flushes the buffer to the sender. This function blocks until all 
   * buffers have been flushed. Equivalent to calling distributed_control::flush()
   */
  void pull_flush();

  /**
   * Can be called anywhere.
   * Flushes the buffer to the sender. This function blocks until all 
   * buffers have been flushed. Equivalent to calling distributed_control::flush()
   */
  void pull_flush(procid_t p);

  /**
   * Can be called anywhere.
   * Flushes the buffer to the sender. This function requests a flush to happen
   * soon. Equivalent to calling distributed_control::flush()
   */
  void pull_flush_soon();


  /**
   * Can be called anywhere.
   * Flushes the buffer to the sender. This function requests a flush to happen
   * soon. Equivalent to calling distributed_control::flush()
   */
  void pull_flush_soon(procid_t p);

  /**
   * Extracts the buffer going to a given target.
   * The first element of the pair points to the head of the linked list
   * The linked list ends when the pointer becomes the second element of 
   * the pair.
   */
  std::pair<buffer_elem*, buffer_elem*> extract(procid_t target);

  void inc_calls_sent(procid_t target);

  void add_to_queue(procid_t target, char* ptr, size_t len);
};
}
}
#endif


================================================
FILE: src/graphlab/scheduler/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/scheduler/fifo_scheduler.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/scheduler/fifo_scheduler.hpp>

#include <graphlab/macros_def.hpp>
namespace graphlab {

void fifo_scheduler::set_options(const graphlab_options& opts) {
  ncpus = opts.get_ncpus();
  std::vector<std::string> keys = opts.get_scheduler_args().get_option_keys();
  foreach(std::string opt, keys) {
    if (opt == "multi") {
      opts.get_scheduler_args().get_option("multi", multi);
    }  else {
      logstream(LOG_FATAL) << "Unexpected Scheduler Option: " << opt << std::endl;
    }
  }
}

// Initializes the internal datastructures
void fifo_scheduler::initialize_data_structures() {
  current_queue.resize(ncpus, 0);
  size_t nqueues = std::max(multi * current_queue.size(), size_t(1));
  queues.resize(nqueues);
  locks.resize(nqueues);
  vertex_is_scheduled.resize(num_vertices);
}

fifo_scheduler::fifo_scheduler(size_t num_vertices,
                               const graphlab_options& opts):
     multi(3), num_vertices(num_vertices) { 
  ASSERT_GE(opts.get_ncpus(), 1);
  set_options(opts);
  initialize_data_structures();
}


void fifo_scheduler::set_num_vertices(const lvid_type numv) {
  num_vertices = numv;
  vertex_is_scheduled.resize(numv);
}

void fifo_scheduler::schedule(const lvid_type vid, double priority) {
  if (vid < num_vertices && !vertex_is_scheduled.set_bit(vid)) {
    /* "Randomize" the task queue task is put in. Note that we do
       not care if this counter is corrupted in race conditions
       Find first queue that is not locked and put task there (or
       after iteration limit) Choose two random queues and use the
       one which has smaller size */
    // M.D. Mitzenmacher The Power of Two Choices in Randomized
    // Load Balancing (1991)
    // http://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.
    size_t idx = 0;
    if(queues.size() > 1) {
      const uint32_t prod = 
          random::fast_uniform(uint32_t(0), 
                               uint32_t(queues.size() * queues.size() - 1));
      const uint32_t r1 = prod / queues.size();
      const uint32_t r2 = prod % queues.size();
      idx = (queues[r1].size() < queues[r2].size()) ? r1 : r2;  
    }
    locks[idx].lock(); queues[idx].push_back(vid); locks[idx].unlock();
  }
}

/** Get the next element in the queue */
sched_status::status_enum fifo_scheduler::get_next(const size_t cpuid,
                                                   lvid_type& ret_vid) {
  /* Check all of my queues for a task */
  // begin scanning from the machine's current queue
  size_t initial_idx = (current_queue[cpuid] % multi) + cpuid * multi;
  for(size_t i = 0; i < queues.size(); ++i) {
    const size_t idx = (initial_idx + i) % queues.size();
    // increment the current queue as long as I am scanning with in the 
    // queues owned by this machine
    current_queue[cpuid] += (i < multi);

    // pick up the lock
    bool good = false;
    locks[idx].lock();
    while(!queues[idx].empty()) {
      // not empty, pop and verify
      ret_vid = queues[idx].front();
      queues[idx].pop_front();
      if (ret_vid < num_vertices) {
        good = vertex_is_scheduled.clear_bit(ret_vid);
        if (good) break;
      }
    }
    locks[idx].unlock();
    // managed to retrieve a task
    if(good) {
      return sched_status::NEW_TASK;
    }
  }
  return sched_status::EMPTY;     
} // end of get_next_task


bool fifo_scheduler::empty() {
  for (size_t i = 0;i < queues.size(); ++i) {
    if (!queues[i].empty()) return false;
  }
  return true;
}

}


================================================
FILE: src/graphlab/scheduler/fifo_scheduler.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIFO_SCHEDULER_HPP
#define GRAPHLAB_FIFO_SCHEDULER_HPP

#include <algorithm>
#include <queue>

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/util/dense_bitset.hpp>

#include <graphlab/options/graphlab_options.hpp>

namespace graphlab {

  /**
   * \ingroup group_schedulers
   *
   * This class defines a multiple queue approximate fifo scheduler.
   * Each processor has its own in_queue which it puts new tasks in
   * and out_queue which it pulls tasks from.  Once a processors
   * in_queue gets too large, the entire queue is placed at the end of
   * the shared master queue.  Once a processors out queue is empty it
   * grabs the next out_queue from the master.
   */
  class fifo_scheduler : public ischeduler {
  
  public:

    typedef std::deque<lvid_type> queue_type;

  private:

    // a bitset denoting if a vertex is scheduled
    dense_bitset vertex_is_scheduled;
    // a collection of FIFO queues
    std::vector<queue_type> queues;
    // a parallel datastructure to queues containing all the locks
    std::vector<padded_simple_spinlock>   locks;
    // the index of the queue currently accessed by a given CPU
    // when used, this is modded so that it ranges from 0 to multi - 1
    std::vector<size_t>   current_queue; 


    // the number of CPUs
    size_t ncpus;
    // The queue to CPU ratio
    size_t multi;
    // the number of vertices in the graph
    size_t num_vertices;
    
    
    void set_options(const graphlab_options& opts); 

    // Initializes the internal datastructures
    void initialize_data_structures();
  public:

    fifo_scheduler(size_t num_vertices,
                   const graphlab_options& opts);


    void set_num_vertices(const lvid_type numv);

    void schedule(const lvid_type vid, double priority = 1 /* ignored */ );

    /** Get the next element in the queue */
    sched_status::status_enum get_next(const size_t cpuid,
                                       lvid_type& ret_vid);


    bool empty();

    static void print_options_help(std::ostream& out) {
      out << "\t multi = [number of queues per thread. Default = 3].\n";
    }


  }; 


} // end of namespace graphlab

#endif


================================================
FILE: src/graphlab/scheduler/get_message_priority.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_SCHEDULER_GET_MESSAGE_PRIORITY_HPP
#define GRAPHLAB_SCHEDULER_GET_MESSAGE_PRIORITY_HPP

#include <boost/type_traits.hpp>
#include <typeinfo>

namespace graphlab {
  
namespace scheduler_impl {

  template <typename T>
  struct implements_priority_member {
    template<typename U, double (U::*)() const> struct SFINAE {};
    template <typename U> static char test(SFINAE<U, &U::priority>*);
    template <typename U> static int test(...);
    static const bool value = (sizeof(test<T>(0)) == sizeof(char));
  };

  template <typename MessageType>
  typename boost::enable_if_c<implements_priority_member<MessageType>::value,
                              double>::type
  get_message_priority(const MessageType &m) {
    return m.priority();
  }

  template <typename MessageType>
  typename boost::disable_if_c<implements_priority_member<MessageType>::value,
                                double>::type
  get_message_priority(const MessageType &m) {
    return 1.0;
  }

} //namespace scheduler_impl
} //namespace graphlab


#endif


================================================
FILE: src/graphlab/scheduler/ischeduler.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com)
 *
 */


#ifndef GRAPHLAB_ISCHEDULER_HPP
#define GRAPHLAB_ISCHEDULER_HPP

#include <vector>
#include <sstream>
#include <ostream>

#include <graphlab/graph/graph_basic_types.hpp>

#include <graphlab/options/graphlab_options.hpp>


namespace graphlab {

  /**
   * This is an enumeration for the possible return values for
   * get_next_tasks
   */
  struct sched_status {
    /// \brief the possible scheduler status.
    enum status_enum {
      NEW_TASK,      /**< The get_next_tasks function returned a new task
                        to be executed */
      EMPTY,         /**< The schedule is empty. */
    };
  };

  /**
   * \ingroup group_schedulers
   *
   * This describes the interface/concept for a scheduler. 
   * The scheduler allows vertices to be scheduled, but deduplicates
   * repeated schedulings of the same vertex. The only guarantee is that
   * if a vertex is scheduled, the vertex will be popped at some point in
   * the future.
   * Note that all functions (with the exception of the
   * constructor and destructor and set_num_vertices()) must be thread-safe.
   */
  class ischeduler {
  public:

    /// destructor
    virtual ~ischeduler() {};

    /** Sets the number of vertices in the graph. Existing schedule
     * will not be cleared. Scheduler will not return a vertex ID
     * exceeding the number of vertices.
     */
    virtual void set_num_vertices(const lvid_type numv) = 0;

    /**
     * Adds vertex vid to the schedule. The new priority is the priority value
     */
    virtual void schedule(const lvid_type vid, double priority = 1) = 0;


    /**
     * This function is called by the engine to ask for the next
     * vertex to process.  The vertex is 
     * returned in ret_msg and ret_vid respectively.
     *
     *  \retval NEWTASK There is a new message to process
     *  \retval EMPTY There are no messages to process
     */
    virtual sched_status::status_enum
    get_next(const size_t cpuid, lvid_type& ret_vid) = 0;

    /// returns true if the scheduler is empty. Need not be consistent.
    virtual bool empty() = 0;

    /**
     * Print a help string describing the options that this scheduler
     * accepts.
     */
    static void print_options_help(std::ostream& out) { };

  };

}
#endif


================================================
FILE: src/graphlab/scheduler/priority_scheduler.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/scheduler/priority_scheduler.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

void priority_scheduler::set_options(const graphlab_options& opts) {
  ncpus = opts.get_ncpus();
  std::vector<std::string> keys = opts.get_scheduler_args().get_option_keys();
  foreach(std::string opt, keys) {
    if (opt == "multi") {
      opts.get_scheduler_args().get_option("multi", multi);
    } else if (opt == "min_priority") {
      opts.get_scheduler_args().get_option("min_priority", min_priority);
    }  else {
      logstream(LOG_FATAL) << "Unexpected Scheduler Option: " << opt << std::endl;
    }
  }
}

// Initializes the internal datastructures
void priority_scheduler::initialize_data_structures() {
  current_queue.resize(ncpus, 0);
  size_t nqueues = std::max(multi * current_queue.size(), size_t(1));
  queues.resize(nqueues);
  locks.resize(nqueues);
  vertex_is_scheduled.resize(num_vertices);
}

priority_scheduler::priority_scheduler(size_t num_vertices,
                                       const graphlab_options& opts):
    multi(3), 
    min_priority(-std::numeric_limits<double>::max()),
    num_vertices(num_vertices) { 
  ASSERT_GE(opts.get_ncpus(), 1);
  set_options(opts);
  initialize_data_structures();
}


void priority_scheduler::set_num_vertices(const lvid_type numv) {
  num_vertices = numv;
  vertex_is_scheduled.resize(numv);
}

void priority_scheduler::schedule(const lvid_type vid, double priority) {
  if (vid < num_vertices && !vertex_is_scheduled.set_bit(vid)) {
    /* "Randomize" the task queue task is put in. Note that we do
       not care if this counter is corrupted in race conditions
       Find first queue that is not locked and put task there (or
       after iteration limit) Choose two random queues and use the
       one which has smaller size */
    // M.D. Mitzenmacher The Power of Two Choices in Randomized
    // Load Balancing (1991)
    // http://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.
    size_t idx = 0;
    if(queues.size() > 1) {
      const uint32_t prod = 
          random::fast_uniform(uint32_t(0), 
                               uint32_t(queues.size() * queues.size() - 1));
      const uint32_t r1 = prod / queues.size();
      const uint32_t r2 = prod % queues.size();
      idx = (queues[r1].size() < queues[r2].size()) ? r1 : r2;  
    }
    locks[idx].lock(); 
    queues[idx].push_or_update(vid, priority); 
    locks[idx].unlock();
  }
}

/** Get the next element in the queue */
sched_status::status_enum priority_scheduler::get_next(const size_t cpuid,
                                                       lvid_type& ret_vid) {
  /* Check all of my queues for a task */
  // begin scanning from the machine's current queue
  size_t initial_idx = (current_queue[cpuid] % multi) + cpuid * multi;
  for(size_t i = 0; i < queues.size(); ++i) {
    const size_t idx = (initial_idx + i) % queues.size();
    // increment the current queue as long as I am scanning with in the 
    // queues owned by this machine
    current_queue[cpuid] += (i < multi);

    // pick up the lock
    bool good = false;
    locks[idx].lock();
    while(!queues[idx].empty() && queues[idx].top().second >= min_priority) {
      // not empty, pop and verify
      ret_vid = queues[idx].pop().first;
      if (ret_vid < num_vertices) {
        good = vertex_is_scheduled.clear_bit(ret_vid);
        if (good) break;
      }
      else continue;
    }
    locks[idx].unlock();
    // managed to retrieve a task
    if(good) {
      return sched_status::NEW_TASK;
    }
  }
  return sched_status::EMPTY;     
} // end of get_next_task


bool priority_scheduler::empty() {
  for (size_t i = 0;i < queues.size(); ++i) {
    if (!queues[i].empty() && queues[i].top().second >= min_priority) {
      return false;
    }
  }
  return true;
}

}


================================================
FILE: src/graphlab/scheduler/priority_scheduler.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_PRIORITY_SCHEDULER_HPP
#define GRAPHLAB_PRIORITY_SCHEDULER_HPP

#include <algorithm>
#include <queue>

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/util/mutable_queue.hpp>
#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/util/dense_bitset.hpp>

#include <graphlab/options/graphlab_options.hpp>

#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \ingroup group_schedulers 
   *
   * This class defines a multiple queue approximate priority scheduler.
   * Each processor has its own in_queue which it puts new tasks in
   * and out_queue which it pulls tasks from.  Once a processors
   * in_queue gets too large, the entire queue is placed at the end of
   * the shared master queue.  Once a processors out queue is empty it
   * grabs the next out_queue from the master.
   */
  class priority_scheduler : public ischeduler {
  
  public:

    typedef mutable_queue<lvid_type, double> queue_type;

  private:

    // a bitset denoting if a vertex is scheduled
    dense_bitset vertex_is_scheduled;
    // a collection of priority queues
    std::vector<queue_type> queues;
    // a parallel datastructure to queues containing all the locks
    std::vector<padded_simple_spinlock>   locks;
    // the index of the queue currently accessed by a given CPU
    // when used, this is modded so that it ranges from 0 to multi - 1
    std::vector<size_t>   current_queue; 


    // the number of CPUs
    size_t ncpus;
    // The queue to CPU ratio
    size_t multi;
    double min_priority; 
    // the number of vertices in the graph
    size_t num_vertices;
    
  
    void set_options(const graphlab_options& opts);

    // Initializes the internal datastructures
    void initialize_data_structures();
  public:

    priority_scheduler(size_t num_vertices, const graphlab_options& opts);

    void set_num_vertices(const lvid_type numv);

    void schedule(const lvid_type vid, double priority = 1);

    /** Get the next element in the queue */
    sched_status::status_enum get_next(const size_t cpuid,
                                       lvid_type& ret_vid);

    bool empty();

    static void print_options_help(std::ostream& out) {
      out << "\t multi = [number of queues per thread. Default = 3].\n"
          << "min_priority = [double, minimum priority required to receive \n"
          << "\t a message, default = -inf]\n";
    }


  }; 


} // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/scheduler/queued_fifo_scheduler.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/scheduler/queued_fifo_scheduler.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab {

void queued_fifo_scheduler::set_options(const graphlab_options& opts) {
  // read the remaining options.
  std::vector<std::string> keys = opts.get_scheduler_args().get_option_keys();
  foreach(std::string opt, keys) {
    if (opt == "queuesize") {
      opts.get_scheduler_args().get_option("queuesize", sub_queue_size);
    } else if (opt == "multi") {
      opts.get_scheduler_args().get_option("multi", multi);
    } else {
      logstream(LOG_FATAL) << "Unexpected Scheduler Option: " << opt << std::endl;
    }
  }
}

void queued_fifo_scheduler::initialize_data_structures() {
  ASSERT_GT(ncpus * multi, 1);
  in_queues.resize(ncpus * multi);
  in_queue_locks.resize(ncpus * multi);
  out_queue_locks.resize(ncpus);
  out_queues.resize(ncpus);
  vertex_is_scheduled.resize(num_vertices);
}

queued_fifo_scheduler::queued_fifo_scheduler(size_t num_vertices,
                                             const graphlab_options& opts) :
    ncpus(opts.get_ncpus()),
    num_vertices(num_vertices),
    multi(3),
    sub_queue_size(100) {
      ASSERT_GE(opts.get_ncpus(), 1);
      set_options(opts);
      initialize_data_structures();
    }

void queued_fifo_scheduler::set_num_vertices(const lvid_type numv) {
  num_vertices = numv;
  vertex_is_scheduled.resize(numv);
}

void queued_fifo_scheduler::schedule(const lvid_type vid, double priority) {
  // If this is a new message, schedule it
  // the min priority will be taken care of by the get_next function
  if (vid < num_vertices && !vertex_is_scheduled.set_bit(vid)) {
    const size_t cpuid= 
        random::fast_uniform(size_t(0), 
                             in_queues.size() - 1);
    in_queue_locks[cpuid].lock();
    queue_type& queue = in_queues[cpuid];
    queue.push_back(vid);
    if(queue.size() > sub_queue_size) {
      master_lock.lock();
      queue_type emptyq;
      master_queue.push_back(emptyq);
      master_queue.back().swap(queue);
      master_lock.unlock();
    }
    in_queue_locks[cpuid].unlock();
  } 
} // end of schedule

/** Get the next element in the queue */
sched_status::status_enum queued_fifo_scheduler::get_next(const size_t cpuid,
                                                          lvid_type& ret_vid) {
  queue_type& myqueue = out_queues[cpuid];
  // if the local queue is empty try to get a queue from the master
  out_queue_locks[cpuid].lock();
  if(myqueue.empty()) {
    master_lock.lock();
    // if master queue is empty... 
    if (!master_queue.empty()) {
      myqueue.swap(master_queue.front());
      master_queue.pop_front();
      master_lock.unlock();
    }
    else {
      master_lock.unlock();
      //try to steal from the inqueues
      for (size_t i = 0; i < in_queues.size(); ++i) {
        size_t idx = (i + multi * cpuid) % in_queues.size();
        if (!in_queues[idx].empty()) {
          in_queue_locks[idx].lock();
          // double check
          if(!in_queues[idx].empty()) {
            myqueue.swap(in_queues[idx]);
          }
          in_queue_locks[idx].unlock();
          if (!myqueue.empty()) break;
        } 
      }
    }
  }
  // end of get next
  bool good = false;
  while(!myqueue.empty()) {
    // not empty, pop and verify
    ret_vid = myqueue.front();
    myqueue.pop_front();
    if (ret_vid < num_vertices) {
      good = vertex_is_scheduled.clear_bit(ret_vid);
      if (good) break;
    }
  }
  out_queue_locks[cpuid].unlock();

  if(good) {
    return sched_status::NEW_TASK;
  } else {
    return sched_status::EMPTY;
  }
} // end of get_next_task


bool queued_fifo_scheduler::empty() {
  for (size_t i = 0;i < out_queues.size(); ++i) {
    if (!out_queues[i].empty()) return false;
  }
  if (!master_queue.empty()) return false;
  for (size_t i = 0;i < in_queues.size(); ++i) {
    if (!in_queues[i].empty()) return false;
  }
  return true;
}

} // namespace graphlab


================================================
FILE: src/graphlab/scheduler/queued_fifo_scheduler.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_QUEUED_FIFO_SCHEDULER_HPP
#define GRAPHLAB_QUEUED_FIFO_SCHEDULER_HPP

#include <algorithm>
#include <queue>


#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/util/dense_bitset.hpp>

#include <graphlab/util/random.hpp>
#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/options/graphlab_options.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * \ingroup group_schedulers
   *
   * This class defines a multiple queue approximate fifo scheduler.
   * Each processor has its own in_queue which it puts new tasks in
   * and out_queue which it pulls tasks from.  Once a processors
   * in_queue gets too large, the entire queue is placed at the end of
   * the shared master queue.  Once a processors out queue is empty it
   * grabs the next out_queue from the master.
   */
  class queued_fifo_scheduler: public ischeduler {
  
  public:

    typedef std::deque<lvid_type> queue_type;

  private:
    size_t ncpus;
    size_t num_vertices;
    size_t multi;
    dense_bitset vertex_is_scheduled;
    std::deque<queue_type> master_queue;
    mutex master_lock;
    size_t sub_queue_size;
    std::vector<queue_type> in_queues;
    std::vector<mutex> in_queue_locks;
    std::vector<queue_type> out_queues;
    std::vector<mutex> out_queue_locks;

    void set_options(const graphlab_options& opts);
    
    void initialize_data_structures();
  public:

    queued_fifo_scheduler(size_t num_vertices,
                          const graphlab_options& opts); 

    void set_num_vertices(const lvid_type numv);

    void schedule(const lvid_type vid, double priority = 1 /* ignored */);
    
    /** Get the next element in the queue */
    sched_status::status_enum get_next(const size_t cpuid,
                                       lvid_type& ret_vid);


    bool empty();

    /**
     * Print a help string describing the options that this scheduler
     * accepts.
     */
    static void print_options_help(std::ostream& out) {
      out << "\t queuesize: [the size at which a subqueue is "
          << "placed in the master queue. default = 100]\n";
      out << "\t multi = [number of queues per thread. Default = 3].\n";
    }


  };


} // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/scheduler/scheduler_factory.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SCHEDULER_FACTORY_HPP
#define GRAPHLAB_SCHEDULER_FACTORY_HPP

#include <string>

// Schedulers
#include <graphlab/options/graphlab_options.hpp>
#include <graphlab/scheduler/scheduler_list.hpp>

#include <boost/preprocessor.hpp>


namespace graphlab {
  
  
  /**
   *  helper for constructing graphlab engines.
   **/
  struct scheduler_factory {
    typedef ischeduler ischeduler_type;
   
    /**
     * Construct the a scheduler
     */
    template<typename Scheduler>
    static ischeduler_type* 
    new_scheduler_impl(size_t num_vertices, const graphlab_options& opts) {
      ischeduler_type* scheduler_ptr = 
        new Scheduler(num_vertices, opts);
      ASSERT_TRUE(scheduler_ptr != NULL);
      return scheduler_ptr;
    } // end of new_scheduler

    /**
     * This function returns a new scheduler for a particular engine
     */    
    static ischeduler_type* 
    new_scheduler(size_t num_vertices, const graphlab_options& opts) {
      std::string scheduler_str = opts.get_scheduler_type();
#define __GENERATE_NEW_SCHEDULER__(r_unused, data_unused, i,  elem)     \
      BOOST_PP_EXPR_IF(i, else)                                         \
        if (scheduler_str == BOOST_PP_TUPLE_ELEM(3,0,elem)) {           \
          typedef BOOST_PP_TUPLE_ELEM(3,1,elem)                         \
            scheduler_type;                                             \
          return new_scheduler_impl<scheduler_type>                     \
            ( num_vertices, opts);                                      \
        }      
      // generate the construction calls
      BOOST_PP_SEQ_FOR_EACH_I(__GENERATE_NEW_SCHEDULER__, _, __SCHEDULER_LIST__);
#undef __GENERATE_NEW_SCHEDULER__        
      logstream(LOG_FATAL) 
        << "Invalid scheduler type: " << scheduler_str << std::endl;
      return NULL;
    } // end of new_scheduler

  }; // end of class scheduler_factory

}; // End of namespace graphlab


#endif


================================================
FILE: src/graphlab/scheduler/scheduler_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_SCHEDULER_INCLUDES_HPP
#define GRAPHLAB_SCHEDULER_INCLUDES_HPP

#include <graphlab/scheduler/fifo_scheduler.hpp>
#include <graphlab/scheduler/get_message_priority.hpp>
#include <graphlab/scheduler/ischeduler.hpp>
 #include <graphlab/scheduler/priority_scheduler.hpp>
#include <graphlab/scheduler/queued_fifo_scheduler.hpp>
#include <graphlab/scheduler/scheduler_factory.hpp>
#include <graphlab/scheduler/scheduler_list.hpp>
#include <graphlab/scheduler/sweep_scheduler.hpp>
#endif


================================================
FILE: src/graphlab/scheduler/scheduler_list.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <algorithm>
#include <graphlab/scheduler/scheduler_list.hpp>
#include <graphlab/util/stl_util.hpp>


namespace graphlab {
  
  std::vector<std::string> get_scheduler_names() {
    std::vector<std::string> ret;
#define __APPEND_TO_RET__(r_unused, data_unused, i,  elem)      \
    ret.push_back(BOOST_PP_TUPLE_ELEM(3,0,elem));
    BOOST_PP_SEQ_FOR_EACH_I(__APPEND_TO_RET__, _, __SCHEDULER_LIST__)
#undef __APPEND_TO_RET__
      return ret;
  }


  std::string get_scheduler_names_str() {
    std::string ret;
    std::vector<std::string> schednames;
    schednames = get_scheduler_names();
    for (size_t i = 0; i < schednames.size(); ++i) {
      if (i > 0) {
        ret = ret + ", ";
      }
      ret = ret + schednames[i];
    }
    return ret;
  }

  static std::string add_line_breaks(const std::string &s, size_t numcols) {
    size_t pos = 0;
    std::string ret;
    while(pos < s.length() - 1) {
      size_t oldpos = pos;
      pos = std::min(pos + numcols, s.length());
    
      size_t newpos = pos;
      // search backward for a space if we are not at the end of the
      // string
      if (pos < s.length()) {
        newpos = s.rfind(" ", pos);
      }
      // if we get back to the old position, or we fail to find a
      // space, force the break
      if (newpos  == std::string::npos || newpos == oldpos) {
        newpos = pos;
      }
      // break
      ret = ret + trim(s.substr(oldpos, newpos - oldpos)) + "\n";
      pos = newpos;
    }
    return ret;
  }


  void print_scheduler_info(std::string s, std::ostream &out) {
    typedef char dummy_message_type;     
    // this is annoying... I need to instantiate the graph<char, char> type to
    // even call the scheduler
#define __GENERATE_SCHEDULER_HELP__(r_unused, data_unused, i,  elem)    \
    BOOST_PP_EXPR_IF(i, else) if (s == BOOST_PP_TUPLE_ELEM(3,0,elem)) { \
      out << "\n";                                                      \
      out << BOOST_PP_TUPLE_ELEM(3,0,elem) << " scheduler\n";           \
      out << std::string(50, '-') << std::endl;                         \
      out << add_line_breaks(BOOST_PP_TUPLE_ELEM(3,2,elem), 50) << "\n" \
          << "Options: \n";                                             \
      BOOST_PP_TUPLE_ELEM(3,1,elem)                                     \
        ::print_options_help(out);                                      \
    }
    /*
     * if (scheduler == "sweep") {
     *   sweep_scheduler<graph<char,char> >::print_options_help(out);
     * }
     * ...
     */
    // generate the construction calls
    BOOST_PP_SEQ_FOR_EACH_I(__GENERATE_SCHEDULER_HELP__, _, __SCHEDULER_LIST__)
    else {
      out << "Scheduler " << s << " not found" << "\n";
    }
#undef __GENERATE_SCHEDULER_HELP__
  } // end of print scheduler info


} // end of namespace graphlab


================================================
FILE: src/graphlab/scheduler/scheduler_list.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SCHEDULER_LIST_HPP
#define GRAPHLAB_SCHEDULER_LIST_HPP
#include <string>
#include <vector>
#include <iostream>
#include <boost/preprocessor.hpp>

#define __SCHEDULER_LIST__                                              \
  (("fifo", fifo_scheduler,                                             \
    "Standard FIFO task queue, poor parallelism, but task evaluation "  \
    "sequence is highly predictable. "                                  \
    "Useful for debugging and testing."))                               \
  (("sweep", sweep_scheduler,                                           \
    "very fast dynamic scheduler. Scans all vertices in sequence, "     \
    "running all update tasks on each vertex evaluated."))              \
  (("priority", priority_scheduler,                                     \
    "Standard Priority queue, poor parallelism, but task evaluation "   \
    "sequence is highly predictable. Useful for debugging"))            \
  (("queued_fifo", queued_fifo_scheduler,                               \
    "This scheduler maintains a shared FIFO queue of FIFO queues. "     \
    "Each thread maintains its own smaller in and out queues. When a "  \
    "threads out queue is too large (greater than \"queuesize\") then " \
    "the thread puts its out queue at the end of the master queue."))   

#include <graphlab/scheduler/fifo_scheduler.hpp>
#include <graphlab/scheduler/sweep_scheduler.hpp>
#include <graphlab/scheduler/priority_scheduler.hpp>
#include <graphlab/scheduler/queued_fifo_scheduler.hpp>


namespace graphlab {
  /// get all the scheduler names
  std::vector<std::string> get_scheduler_names();

  /// get all the scheduler names concated into a string
  std::string get_scheduler_names_str();

  /// Display the scheduler options for a particular scheduler
  void print_scheduler_info(std::string s, std::ostream &out);
}

#endif


================================================
FILE: src/graphlab/scheduler/sweep_scheduler.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/scheduler/sweep_scheduler.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

void sweep_scheduler::set_options(const graphlab_options& opts) {
  std::vector<std::string> keys = opts.get_scheduler_args().get_option_keys();
  bool max_iter_set = false;
  foreach(std::string opt, keys) {
    if (opt == "order") {
      opts.get_scheduler_args().get_option("order", ordering);
      ASSERT_TRUE(ordering == "random" || ordering == "ascending");
    } else if (opt == "strict") {
      opts.get_scheduler_args().get_option("strict", strict_round_robin);
    } else if (opt == "max_iterations") {
      opts.get_scheduler_args().get_option("max_iterations", max_iterations);
      max_iter_set = true;
    } else {
      logstream(LOG_FATAL) << "Unexpected Scheduler Option: " << opt << std::endl;
    }
  }

  if (max_iter_set) {
    ASSERT_MSG(strict_round_robin, 
               "sweep_scheduler: \"strict\" must be set with \"max_iteration\"");
  }
}

sweep_scheduler::sweep_scheduler(size_t num_vertices,
                                 const graphlab_options& opts) :
    ncpus(opts.get_ncpus()),
    num_vertices(num_vertices),
    strict_round_robin(true),
    max_iterations(std::numeric_limits<size_t>::max()),
    vertex_is_scheduled(num_vertices) {
  // initialize defaults
  ASSERT_GE(opts.get_ncpus(), 1);
  ordering = "random";
  set_options(opts);

  if (ordering == "ascending") {
    randomizer = 1;
  } else if(ordering == "random") {
    randomizer = 1500450271;
  }

  if(strict_round_robin) {
    logstream(LOG_INFO)
        << "Using a strict round robin schedule." << std::endl;
    // Max iterations only applies to strict round robin
    if(max_iterations != std::numeric_limits<size_t>::max()) {
      logstream(LOG_INFO)
          << "Using maximum iterations: " << max_iterations << std::endl;
    }
    rr_index = 0;
  } else {
    // each cpu is responsible for its own subset of vertices
    // Initialize the cpu2index counters
    cpu2index.resize(ncpus);
    for(size_t i = 0; i < cpu2index.size(); ++i) cpu2index[i] = i;
  }
  vertex_is_scheduled.resize(num_vertices);
} // end of constructor


void sweep_scheduler::set_num_vertices(const lvid_type numv) {
  num_vertices = numv;
  vertex_is_scheduled.resize(numv);
}

void sweep_scheduler::schedule(const lvid_type vid, double priority) {      
  if (vid < num_vertices) vertex_is_scheduled.set_bit(vid);
} 


sched_status::status_enum sweep_scheduler::get_next(const size_t cpuid,
                                                    lvid_type& ret_vid) {         
  const size_t max_fails = (num_vertices/ncpus) + 1;
  // Check to see if max iterations have been achieved 
  if(strict_round_robin && (rr_index / num_vertices) >= max_iterations) 
    return sched_status::EMPTY;
  // Loop through all vertices that are associated with this
  // processor searching for a vertex with an active task
  for(size_t idx = get_and_inc_index(cpuid), fails = 0; 
      fails <= max_fails; // 
      idx = get_and_inc_index(cpuid), ++fails) {
    // It is possible that the get_and_inc_index could return an
    // invalid index if the number of cpus exceeds the number of
    // vertices.  In This case we alwasy return empty
    if(__builtin_expect(idx >= num_vertices, false)) return sched_status::EMPTY;
    const lvid_type vid = (idx * randomizer) % num_vertices;
    bool success = vertex_is_scheduled.clear_bit(vid);
    while(success) { // Job found now decide whether to keep it
      ret_vid = vid; 
      return sched_status::NEW_TASK;
    }
  } // end of for loop
  return sched_status::EMPTY;
} // end of get_next


}


================================================
FILE: src/graphlab/scheduler/sweep_scheduler.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_SWEEP_SCHEDULER_HPP
#define GRAPHLAB_SWEEP_SCHEDULER_HPP

#include <queue>
#include <cmath>
#include <cassert>

#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/scheduler/ischeduler.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/options/graphlab_options.hpp>

#include <graphlab/macros_def.hpp>

namespace graphlab {

   /** \ingroup group_schedulers
    */
  class sweep_scheduler: public ischeduler {
  private:

    size_t ncpus;

    size_t num_vertices;
    bool strict_round_robin;
    atomic<size_t> rr_index;
    size_t max_iterations;
    size_t randomizer;

    std::vector<lvid_type>             cpu2index;

    dense_bitset vertex_is_scheduled;
    std::string                             ordering;

    void set_options(const graphlab_options& opts);

  public:
    sweep_scheduler(size_t num_vertices,
                    const graphlab_options& opts);

    void set_num_vertices(const lvid_type numv);

    void schedule(const lvid_type vid, double priority = 1 /* ignored */) ; 


    sched_status::status_enum get_next(const size_t cpuid, lvid_type& ret_vid);
    
    
    static void print_options_help(std::ostream &out) {
      out << "order = [string: {random, ascending} default=random]\n"
          << "strict = [bool, use strict round robin schedule, default=true]\n"
          << "max_iterations = [integer, maximum number of iterations "
          << " (requires strict=true) \n"
          << "\t default = inf]\n";
    } // end of print_options_help


    bool empty() {
      return (vertex_is_scheduled.popcount() == 0);
    }

  private:
    inline size_t get_and_inc_index(const size_t cpuid) {
      if (strict_round_robin) { 
        return rr_index++ % num_vertices; 
      } else {
        const size_t index = cpu2index[cpuid];
        cpu2index[cpuid] += ncpus;
        // Address loop around
        if (__builtin_expect(cpu2index[cpuid] >= num_vertices, false)) 
          cpu2index[cpuid] = cpuid;
        return index;
      }
    }// end of next index

  };


} // end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/serialization/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/serialization/basic_types.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/*
   This files defines the serializer/deserializer for all basic types
   (as well as string and pair)  
*/
#ifndef ARCHIVE_BASIC_TYPES_HPP
#define ARCHIVE_BASIC_TYPES_HPP

#include <string>
#include <graphlab/serialization/serializable_pod.hpp>
#include <graphlab/logger/assertions.hpp>
#include <stdint.h>

namespace graphlab {
  class oarchive;
  class iarchive;
}


namespace graphlab {
  namespace archive_detail {

    /** Serialization of null terminated const char* strings.
     * This is necessary to serialize constant strings like
     * \code 
     * oarc << "hello world";
     * \endcode
     */
    template <typename OutArcType>
    struct serialize_impl<OutArcType, const char*, false> {
      static void exec(OutArcType& oarc, const char* const& s) {
        // save the length
        // ++ for the \0
        size_t length = strlen(s); length++;
        oarc << length;
        oarc.write(reinterpret_cast<const char*>(s), length);
        DASSERT_FALSE(oarc.fail());
      }
    };


    /// Serialization of fixed length char arrays
    template <typename OutArcType, size_t len>
    struct serialize_impl<OutArcType, char [len], false> {
      static void exec(OutArcType& oarc, const char s[len] ) { 
        size_t length = len;
        oarc << length;
        oarc.write(reinterpret_cast<const char*>(s), length);
        DASSERT_FALSE(oarc.fail());
      }
    };


    /// Serialization of null terminated char* strings
    template <typename OutArcType>
    struct serialize_impl<OutArcType, char*, false> {
      static void exec(OutArcType& oarc, char* const& s) {
        // save the length
        // ++ for the \0
        size_t length = strlen(s); length++;
        oarc << length;
        oarc.write(reinterpret_cast<const char*>(s), length);
        DASSERT_FALSE(oarc.fail());
      }
    };

    /// Deserialization of null terminated char* strings
    template <typename InArcType>
    struct deserialize_impl<InArcType, char*, false> {
      static void exec(InArcType& iarc, char*& s) {
        // Save the length and check if lengths match
        size_t length;
        iarc >> length;
        s = new char[length];
        //operator>> the rest
        iarc.read(reinterpret_cast<char*>(s), length);
        DASSERT_FALSE(iarc.fail());
      }
    };
  
    /// Deserialization of fixed length char arrays 
    template <typename InArcType, size_t len>
    struct deserialize_impl<InArcType, char [len], false> {
      static void exec(InArcType& iarc, char s[len]) { 
        size_t length;
        iarc >> length;
        ASSERT_LE(length, len);
        iarc.read(reinterpret_cast<char*>(s), length);
        DASSERT_FALSE(iarc.fail());
      }
    };


    /// Serialization of std::string
    template <typename OutArcType>
    struct serialize_impl<OutArcType, std::string, false> {
      static void exec(OutArcType& oarc, const std::string& s) {
        size_t length = s.length();
        oarc << length;
        oarc.write(reinterpret_cast<const char*>(s.c_str()), 
                   (std::streamsize)length);
        DASSERT_FALSE(oarc.fail());
      }
    };


    /// Deserialization of std::string
    template <typename InArcType>
    struct deserialize_impl<InArcType, std::string, false> {
      static void exec(InArcType& iarc, std::string& s) {
        //read the length
        size_t length;
        iarc >> length;
        //resize the string and read the characters
        s.resize(length);
        iarc.read(const_cast<char*>(s.c_str()), (std::streamsize)length);
        DASSERT_FALSE(iarc.fail());
      }
    };

    /// Serialization of std::pair
    template <typename OutArcType, typename T, typename U>
    struct serialize_impl<OutArcType, std::pair<T, U>, false > {
      static void exec(OutArcType& oarc, const std::pair<T, U>& s) {
        oarc << s.first << s.second;
      }
    };


    /// Deserialization of std::pair
    template <typename InArcType, typename T, typename U>
    struct deserialize_impl<InArcType, std::pair<T, U>, false > {
      static void exec(InArcType& iarc, std::pair<T, U>& s) {
        iarc >> s.first >> s.second;
      }
    };


    /** Serialization of 8 byte wide integers
     * \code 
     * oarc << vec.length();
     * \endcode
     */
    template <typename OutArcType>
    struct serialize_impl<OutArcType, unsigned long , true> {
      static void exec(OutArcType& oarc, const unsigned long & s) {
        // only bottom 1 byte
        if ((s >> 8) == 0) {
          unsigned char c = 0;
          unsigned char trunc_s = s;
          oarc.direct_assign(c);
          oarc.direct_assign(trunc_s);
        }
        // only bottom 2 byte
        else if ((s >> 16) == 0) {
          unsigned char c = 1;
          unsigned short trunc_s = s;
          oarc.direct_assign(c);
          oarc.direct_assign(trunc_s);
        }
        // only bottom 4 byte
        else if ((s >> 32) == 0) {
          unsigned char c = 2;
          uint32_t trunc_s = s;
          oarc.direct_assign(c);
          oarc.direct_assign(trunc_s);
        } 
        else {
          unsigned char c = 3;
          oarc.direct_assign(c);
          oarc.direct_assign(s);
        }
      }
    };


    /// Deserialization of 8 byte wide integer 
    template <typename InArcType>
    struct deserialize_impl<InArcType, unsigned long , true> {
      static void exec(InArcType& iarc, unsigned long & s) {
        unsigned char c;
        iarc.read(reinterpret_cast<char*>(&c), 1);
        switch(c) {
         case 0: {
           unsigned char val;
           iarc.read(reinterpret_cast<char*>(&val), 1);
           s = val;
           break;
         }
         case 1: {
           unsigned short val;
           iarc.read(reinterpret_cast<char*>(&val), 2);
           s = val;
           break;
         }
         case 2: {
           uint32_t val;
           iarc.read(reinterpret_cast<char*>(&val), 4);
           s = val;
           break;
         }
         case 3: {
           iarc.read(reinterpret_cast<char*>(&s), 8);
           break;
         }
         default:
           ASSERT_LE(c, 3);
        };
      }
    };


  } // namespace archive_detail
} // namespace graphlab
 
#undef INT_SERIALIZE
#endif


================================================
FILE: src/graphlab/serialization/conditional_serialize.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZATION_CONDITIONAL_SERIALIZE_HPP
#define GRAPHLAB_SERIALIZATION_CONDITIONAL_SERIALIZE_HPP
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iarchive.hpp>
namespace graphlab {

template <typename T>
struct conditional_serialize {
  bool hasval;
  T val;

  conditional_serialize(): hasval(false) { }
  conditional_serialize(T& val): hasval(true), val(val) { }

  conditional_serialize(const conditional_serialize& cs): hasval(cs.hasval), val(cs.val) { }
  conditional_serialize& operator=(const conditional_serialize& cs) {
    hasval = cs.hasval;
    val = cs.val;
    return (*this);
  }
  void save(oarchive& oarc) const {
    oarc << hasval;
    if (hasval) oarc << val;
  }

  void load(iarchive& iarc) {
    iarc >> hasval;
    if (hasval) iarc >> val;
  }
};

};

#endif


================================================
FILE: src/graphlab/serialization/has_load.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_HAS_LOAD_HPP
#define GRAPHLAB_HAS_LOAD_HPP

#include <typeinfo>


namespace graphlab {
  namespace archive_detail {

    /** SFINAE method to detect if a class T 
     * implements a function void T::load(ArcType&)
     * 
     * If T implements the method, has_load_method<ArcType,T>::value will be 
     * true. Otherwise it will be false
     */
    template<typename ArcType, typename T>
    struct has_load_method
    {
      template<typename U, void (U::*)(ArcType&)> struct SFINAE {};
      template<typename U> static char Test(SFINAE<U, &U::load>*);
      template<typename U> static int Test(...);
      static const bool value = sizeof(Test<T>(0)) == sizeof(char);
    };

    /**
     *  load_or_fail<ArcType, T>(arc, t)
     *  will call this version of the function if
     *  T implements void T::load(ArcType&).
     *
     * load_or_fail<ArcType, T>(arc, t) will therefore load the class successfully
     * if T implements the load function correctly. Otherwise, calling 
     * load_or_fail will print an error message.
     */
    template <typename ArcType, typename ValueType>
    typename boost::enable_if_c<has_load_method<ArcType, ValueType>::value, void>::type 
    load_or_fail(ArcType& o, ValueType &t) { 
      t.load(o);
    }

     /**
     *  load_or_fail<ArcType, T>(arc, t)
     *  will call this version of the function if
     *
     * load_or_fail<ArcType, T>(arc, t) will therefore load the class successfully
     * if T implements the load function correctly. Otherwise, calling 
     * load_or_fail will print an error message.
     * T does not implement void T::load(ArcType&).
     */
    template <typename ArcType, typename ValueType>
    typename boost::disable_if_c<has_load_method<ArcType, ValueType>::value, void>::type 
    load_or_fail(ArcType& o, ValueType &t) { 
      ASSERT_MSG(false, "Trying to deserializable type %s without valid load method.", typeid(ValueType).name()); 
    }
  
  }  // archive_detail
}  // graphlab

#endif


================================================
FILE: src/graphlab/serialization/has_save.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef HAS_SAVE_HPP
#define HAS_SAVE_HPP
#include <typeinfo>

namespace graphlab {
namespace archive_detail {

  /** SFINAE method to detect if a class T 
   * implements a function void T::save(ArcType&) const
   * 
   * If T implements the method, has_save_method<ArcType,T>::value will be 
   * true. Otherwise it will be false
   */
  template<typename ArcType, typename T>
  struct has_save_method
  {
	  template<typename U, void (U::*)(ArcType&) const> struct SFINAE {};
	  template<typename U> static char Test(SFINAE<U, &U::save>*);
	  template<typename U> static int Test(...);
	  static const bool value = sizeof(Test<T>(0)) == sizeof(char);
  };

  /**
   *  save_or_fail<ArcType, T>(arc, t)
   *  will call this version of the function if
   *  T implements void T::save(ArcType&) const.
   *  
   * save_or_fail<ArcType, T>(arc, t) will therefore save the class successfully
   * if T implements the save function correctly. Otherwise, calling 
   * save_or_fail will print an error message.
   */
  template <typename ArcType, typename ValueType>
  typename boost::enable_if_c<has_save_method<ArcType, ValueType>::value, void>::type 
  save_or_fail(ArcType& o, const ValueType &t) { 
    t.save(o);
  }
 
  /**
   *  save_or_fail<ArcType, T>(arc, t)
   *  will call this version of the function if
   *  
   * save_or_fail<ArcType, T>(arc, t) will therefore save the class successfully
   * if T implements the save function correctly. Otherwise, calling 
   * save_or_fail will print an error message.
   * T does not implement void T::save(ArcType&) const.
   */
  template <typename ArcType, typename ValueType>
  typename boost::disable_if_c<has_save_method<ArcType, ValueType>::value, void>::type 
  save_or_fail(ArcType& o, const ValueType &t) { 
    ASSERT_MSG(false,"Trying to serializable type %s without valid save method.", typeid(ValueType).name()); 
  }
 
}  // archive_detail
}  // graphlab

#endif


================================================
FILE: src/graphlab/serialization/iarchive.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_HPP
#include <graphlab/serialization/serialize.hpp>

#else


#ifndef GRAPHLAB_IARCHIVE_HPP
#define GRAPHLAB_IARCHIVE_HPP

#include <iostream>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/serialization/is_pod.hpp>
#include <graphlab/serialization/has_load.hpp>
namespace graphlab {

  /**
   * \ingroup group_serialization
   * \brief The serialization input archive object which, provided
   * with a reference to an istream, will read from the istream,
   * providing deserialization capabilities.
   *
   * Given a source of serialized bytes (written by an graphlab::oarchive),
   * in the form of a standard input stream, you can construct an iarchive
   * object by:
   * \code
   *   // where strm is an istream object
   *   graphlab::iarchive iarc(strm);
   * \endcode
   *
   * For instance, to deserialize from a file,
   * \code
   *   std::ifstream fin("inputfile.bin");
   *   graphlab::iarchive iarc(fin);
   * \endcode
   *
   * Once the iarc object is constructed, \ref sec_serializable
   * objects can be read from it using the >> stream operator.
   *
   * \code
   *    iarc >> a >> b >> c;
   * \endcode
   *
   * Alternatively, data can be directly read from the stream using
   * the iarchive::read() and iarchive::read_char() functions.
   *
   * For more usage details, see \ref serialization
   *
   * The iarchive object should not be used once the associated stream
   * object is closed or is destroyed.
   *
   * To use this class, include
   * graphlab/serialization/serialization_includes.hpp
   */
  class iarchive {
  public:
    std::istream* in;
    const char* buf;
    size_t off;
    size_t len;

    /// Directly reads a single character from the input stream
    inline char read_char() {
      char c;
      if (buf) {
        c = buf[off];
        ++off;
      } else {
        in->get(c);
      }
      return c;
    }

    /**
     *  Directly reads a sequence of "len" bytes from the
     *  input stream into the location pointed to by "c"
     */
    inline void read(char* c, size_t l) {
      if (buf) {
        memcpy(c, buf + off, l);
        off += l;
      } else {
        in->read(c, l);
      }
    }


    /// Returns true if the underlying stream is in a failure state
    inline bool fail() {
      return in == NULL ? off > len : in->fail();
    }

    /**
     * Constructs an iarchive object.
     * Takes a reference to a generic std::istream object and associates
     * the archive with it. Reads from the archive will read from the
     * assiciated input stream.
     */
    inline iarchive(std::istream& instream)
      : in(&instream), buf(NULL), off(0), len(0) { }

    inline iarchive(const char* buf, size_t len)
      : in(NULL), buf(buf), off(0), len(len) { }

    ~iarchive() {}
  };


  /**
   * \ingroup group_serialization
   * \brief
   * When this archive is used to deserialize an object,
   * and the object does not support serialization,
   * failure will only occur at runtime. Otherwise equivalent to
   * graphlab::iarchive.
   */
  class iarchive_soft_fail{
  public:

    iarchive *iarc;
    bool mine;

    /// Directly reads a single character from the input stream
    inline char read_char() {
      return iarc->read_char();
    }

    /**
     *  Directly reads a sequence of "len" bytes from the
     *  input stream into the location pointed to by "c"
     */
    inline void read(char* c, size_t len) {
      iarc->read(c, len);
    }

    /// Returns true if the underlying stream is in a failure state
    inline bool fail() {
      return iarc->fail();
    }

    /**
     * Constructs an iarchive_soft_fail object.
     * Takes a reference to a generic std::istream object and associates
     * the archive with it. Reads from the archive will read from the
     * assiciated input stream.
     */
    inline iarchive_soft_fail(std::istream &instream)
      : iarc(new iarchive(instream)), mine(true) {}

    /**
     * Constructs an iarchive_soft_fail object from an iarchive.
     * Both will share the same input stream
     */
    inline iarchive_soft_fail(iarchive &iarc)
      : iarc(&iarc), mine(false) {}

    inline ~iarchive_soft_fail() { if (mine) delete iarc; }
  };


  namespace archive_detail {

    /// called by the regular archive The regular archive will do a hard fail
    template <typename InArcType, typename T>
    struct deserialize_hard_or_soft_fail {
      inline static void exec(InArcType& iarc, T& t) {
        t.load(iarc);
      }
    };

    /// called by the soft fail archive
    template <typename T>
    struct deserialize_hard_or_soft_fail<iarchive_soft_fail, T> {
      inline static void exec(iarchive_soft_fail& iarc, T& t) {
        load_or_fail(*(iarc.iarc), t);
      }
    };


    /**
       Implementation of the deserializer for different types.  This is the
       catch-all. If it gets here, it must be a non-POD and is a class.  We
       therefore call the .save function.  Here we pick between the archive
       types using serialize_hard_or_soft_fail
    */
    template <typename InArcType, typename T, bool IsPOD>
    struct deserialize_impl {
      inline static void exec(InArcType& iarc, T& t) {
        deserialize_hard_or_soft_fail<InArcType, T>::exec(iarc, t);
      }
    };

    // catch if type is a POD
    template <typename InArcType, typename T>
    struct deserialize_impl<InArcType, T, true>{
      inline static void exec(InArcType& iarc, T &t) {
        iarc.read(reinterpret_cast<char*>(&t),
                  sizeof(T));
      }
    };

  } //namespace archive_detail

  /// \cond GRAPHLAB_INTERNAL

  /**
     Allows Use of the "stream" syntax for serialization
  */
  template <typename T>
  inline iarchive& operator>>(iarchive& iarc, T &t) {
    archive_detail::deserialize_impl<iarchive,
                                     T,
                                     gl_is_pod<T>::value >::exec(iarc, t);
    return iarc;
  }


  /**
     Allows Use of the "stream" syntax for serialization
  */
  template <typename T>
  inline iarchive_soft_fail& operator>>(iarchive_soft_fail& iarc, T &t) {
    archive_detail::deserialize_impl<iarchive_soft_fail,
                                     T,
                                     gl_is_pod<T>::value >::exec(iarc, t);
    return iarc;
  }


  /**
     deserializes an arbitrary pointer + length from an archive
  */
  inline iarchive& deserialize(iarchive& iarc,
                               void* str,
                               const size_t length) {
    iarc.read(reinterpret_cast<char*>(str), (std::streamsize)length);
    assert(!iarc.fail());
    return iarc;
  }


  /**
     deserializes an arbitrary pointer + length from an archive
  */
  inline iarchive_soft_fail& deserialize(iarchive_soft_fail& iarc,
                                         void* str,
                                         const size_t length) {
    iarc.read(reinterpret_cast<char*>(str), (std::streamsize)length);
    assert(!iarc.fail());
    return iarc;
  }

  /// \endcond GRAPHLAB_INTERNAL

  /**
     \ingroup group_serialization

     \brief Macro to make it easy to define out-of-place loads

     In the event that it is impractical to implement a save() and load()
     function in the class one wnats to serialize, it is necessary to define
     an "out of save" save and load.

     See \ref sec_serializable_out_of_place for an example

     \note important! this must be defined in the global namespace!
  */
#define BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval)       \
  namespace graphlab{ namespace archive_detail {        \
  template <typename InArcType>                           \
  struct deserialize_impl<InArcType, tname, false>{       \
  static void exec(InArcType& arc, tname & tval) {

#define END_OUT_OF_PLACE_LOAD() } }; } }


} // namespace graphlab


#endif

#endif


================================================
FILE: src/graphlab/serialization/is_pod.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_IS_POD_HPP
#define GRAPHLAB_IS_POD_HPP
#include <boost/type_traits.hpp>

namespace graphlab {

  /** \ingroup group_serialization
    \brief Inheriting from this type will force the serializer
    to treat the derived type as a POD type.
    */
  struct IS_POD_TYPE { };

  /**
   * \ingroup group_serialization
   *
   * \brief Tests if T is a POD type
   *
   * gl_is_pod<T>::value is true if T is a POD type (as determined by
   * boost::is_pod) or if T inherits from IS_POD_TYPE. gl_is_pod<T>::value
   * is false otherwise.
   */
  template <typename T>
  struct gl_is_pod{
    // it is a pod and is not an integer since we have special handlings for integers

    BOOST_STATIC_CONSTANT(bool, value = (boost::type_traits::ice_or<
                                            boost::is_scalar<T>::value,
                                            boost::is_base_of<IS_POD_TYPE, T>::value
                                          >::value));

    // standard POD detection is no good because things which contain pointers
    // are POD, but are not serializable
    // (T is POD and  T is not an integer of size >= 2)
    /*BOOST_STATIC_CONSTANT(bool, value =
                          (
                           boost::type_traits::ice_and<
                             boost::is_pod<T>::value,
                             boost::type_traits::ice_not<
                               boost::type_traits::ice_and<
                                 boost::is_integral<T>::value,
                                 sizeof(T) >= 2
                                 >::value
                               >::value
                             >::value
                          ));*/

  };
  
  /// \internal

  template <typename T>
  struct gl_is_pod_or_scaler{
    BOOST_STATIC_CONSTANT(bool, value =
                          (
                           boost::type_traits::ice_or<
                             boost::is_scalar<T>::value,
                             gl_is_pod<T>::value>::value
                          ));
  };
}

#endif


================================================
FILE: src/graphlab/serialization/iterator.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_ITERATOR_HPP
#define GRAPHLAB_SERIALIZE_ITERATOR_HPP

#include <iterator>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iarchive.hpp>

namespace graphlab {

  /**
   * \ingroup group_serialization
    \brief Serializes the contents between the iterators begin and end.

    This function prefers random access iterators since it needs
    a distance between the begin and end iterator.
    This function as implemented will work for other input iterators
    but is extremely inefficient.
    
    \tparam OutArcType The output archive type. This should not need to be
                       specified. The compiler will typically infer this
                       correctly.
    \tparam RandomAccessIterator The iterator type. This should not need to be
                       specified. The compiler will typically infer this
                       correctly.
    
    \param oarc A reference to the output archive to write to.
    \param begin The start of the iterator range to write.
    \param end The end of the iterator range to write.
   */
  template <typename OutArcType, typename RandomAccessIterator>
  void serialize_iterator(OutArcType& oarc, RandomAccessIterator begin,
                                            RandomAccessIterator end){
    const size_t vsize = std::distance(begin, end);
    oarc << vsize;
    // store each element
    for(; begin != end; ++begin) oarc << *begin;
  }


  /**
    \ingroup group_serialization
    \brief Serializes the contents between the iterators begin and end.

    This functions takes all iterator types, but takes a "count" for
    efficiency. This count is checked and will return failure if the number
    of elements serialized does not match the count
 
    \tparam OutArcType The output archive type. This should not need to be
                       specified. The compiler will typically infer this
                       correctly.
    \tparam InputIterator The iterator type. This should not need to be
                       specified. The compiler will typically infer this
                       correctly.

    \param oarc A reference to the output archive to write to.
    \param begin The start of the iterator range to write.
    \param end The end of the iterator range to write.
    \param vsize The distance between the iterators begin and end. Must match
                 std::distance(begin, end);
   */
  template <typename OutArcType, typename InputIterator>
  void serialize_iterator(OutArcType& oarc, InputIterator begin,
                                            InputIterator end, size_t vsize){
    oarc << vsize;
    //store each element
    size_t count = 0;
    for(; begin != end; ++begin) { oarc << *begin;  ++count; }
    // fail if count does not match
    ASSERT_EQ(count, vsize);
  }

  /**
    \ingroup group_serialization
    \brief The accompanying function to serialize_iterator()
    Reads elements from the stream and writes it to the output iterator.
    
    Note that this requires an additional template parameter T which is the
    "type of object to deserialize"
    This is necessary for instance for the map type. The 
    <code>map<T,U>::value_type</code>
    is <code>pair<const T,U></code>which is not useful since I cannot assign to
    it.  In this case, <code>T=pair<T,U></code>

    \tparam OutArcType The output archive type. 
    \tparam T The type of values to deserialize
    \tparam OutputIterator The type of the output iterator to be written to.
                           This should not need to be specified. The compiler
                           will typically infer this correctly.

    \param iarc A reference to the input archive
    \param result The output iterator to write to

   */
  template <typename InArcType, typename T, typename OutputIterator>
  void deserialize_iterator(InArcType& iarc, OutputIterator result) {
    // get the number of elements to deserialize
    size_t length = 0;
    iarc >> length;
    
    // iterate through and send to the output iterator
    for (size_t x = 0; x < length ; ++x){
      /**
       * A compiler error on this line means that one of the user
       * defined types currently trying to be serialized (e.g.,
       * vertex_data, edge_data, messages, gather_type, or
       * vertex_programs) does not have a default constructor.
       */
      T v;
      iarc >> v;
      (*result) = v;
      result++;
    }
  }
  
 
} 
#endif 


================================================
FILE: src/graphlab/serialization/list.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_LIST_HPP
#define GRAPHLAB_SERIALIZE_LIST_HPP

#include <list>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>


namespace graphlab {
namespace archive_detail {
  /** serializes a list  */
  template <typename OutArcType, typename T>
  struct serialize_impl<OutArcType, std::list<T>, false > {
  static void exec(OutArcType& oarc, const std::list<T>& vec){
    serialize_iterator(oarc,vec.begin(),vec.end(), vec.size());
  }
  };

  /** deserializes a list  */
  template <typename InArcType, typename T>
  struct deserialize_impl<InArcType, std::list<T>, false > {
  static void exec(InArcType& iarc, std::list<T>& vec){
    vec.clear();
    deserialize_iterator<T>(iarc, std::inserter(vec,vec.end()));
  }
  };
} // archive_detail  
} // graphlab
#endif 


================================================
FILE: src/graphlab/serialization/map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_MAP_HPP
#define GRAPHLAB_SERIALIZE_MAP_HPP

#include <map>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>

namespace graphlab {

namespace archive_detail {
  /** Serializes a map */
  template <typename OutArcType, typename T, typename U>
  struct serialize_impl<OutArcType, std::map<T,U>, false > {
  static void exec(OutArcType& oarc, const std::map<T,U>& vec){
    serialize_iterator(oarc,
                       vec.begin(), vec.end(), vec.size());
  }
  };

  /** deserializes a map  */
      
  template <typename InArcType, typename T, typename U>
  struct deserialize_impl<InArcType, std::map<T,U>, false > {
  static void exec(InArcType& iarc, std::map<T,U>& vec){
    vec.clear();
    deserialize_iterator<InArcType, 
                         std::pair<T,U> >(iarc, 
                                          std::inserter(vec,vec.end()));
  }
  };

} // archive_detail  
} // graphlab
#endif 


================================================
FILE: src/graphlab/serialization/oarchive.hpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// This file should not be included directly. use serialize.hpp
#ifndef GRAPHLAB_SERIALIZE_HPP
#include <graphlab/serialization/serialize.hpp>

#else

#ifndef GRAPHLAB_OARCHIVE_HPP
#define GRAPHLAB_OARCHIVE_HPP

#include <iostream>
#include <string>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/serialization/is_pod.hpp>
#include <graphlab/serialization/has_save.hpp>
#include <graphlab/util/branch_hints.hpp>
namespace graphlab {

  /**
   * \ingroup group_serialization
   * \brief The serialization output archive object which, provided
   * with a reference to an ostream, will write to the ostream,
   * providing serialization capabilities.
   *
   * Given a standard output stream, you can construct an oarchive
   * object by:
   * \code
   *   // where strm is an ostream object
   *   graphlab::oarchive oarc(strm);
   * \endcode
   *
   * For instance, to serialize to a file,
   * \code
   *   std::ofstream fout("outputfile.bin");
   *   graphlab::oarchive oarc(fout);
   * \endcode
   *
   * Once the oarc object is constructed, \ref sec_serializable objects can be
   * written to it using the << stream operator.
   *
   * \code
   *    oarc << a << b << c;
   * \endcode
   *
   * Alternatively, data can be directly written to the stream
   * using the oarchive::write() function.
   *
   * Written data can be deserialized using graphlab::iarchive.
   * For more usage details, see \ref serialization
   *
   * The oarchive object should not be used once the associated stream
   * object is closed or is destroyed.
   *
   * The oarc object
   * does <b> not </b> flush the associated stream, and the user may need to
   * manually flush the associated stream to clear any stream buffers.
   * For instance, while the std::stringstream may be used for both output
   * and input, it is necessary to flush the stream before all bytes written to
   * the stringstream are available for input.
   *
   * To use this class, include
   * graphlab/serialization/serialization_includes.hpp
   */
  class oarchive{
  public:
    std::ostream* out;
    char* buf;
    size_t off;
    size_t len;
    /// constructor. Takes a generic std::ostream object
    inline oarchive(std::ostream& outstream)
      : out(&outstream),buf(NULL),off(0),len(0) {}

    inline oarchive(void)
      : out(NULL),buf(NULL),off(0),len(0) {}

    inline void expand_buf(size_t s) {
        if (__unlikely__(off + s > len)) {
          len = 2 * (s + len);
          buf = (char*)realloc(buf, len);
        }
     }
    /** Directly writes "s" bytes from the memory location
     * pointed to by "c" into the stream.
     */
    inline void write(const char* c, std::streamsize s) {
      if (out == NULL) {
        expand_buf(s);
        memcpy(buf + off, c, s);
        off += s;
      } else {
        out->write(c, s);
      }
    }
    template <typename T>
    inline void direct_assign(const T& t) {
      if (out == NULL) {
        expand_buf(sizeof(T));
        (*reinterpret_cast<T*>(buf + off)) = t;
        off += sizeof(T);
      }
      else {
        out->write(reinterpret_cast<const char*>(&t), sizeof(T));
      }
    }

    inline void advance(size_t s) {
      if (out == NULL) {
        expand_buf(s);
        off += s;
      } else {
        out->seekp(s, std::ios_base::cur);
      }
    }

    /// Returns true if the underlying stream is in a failure state
    inline bool fail() {
      return out == NULL ? false : out->fail();
    }

    inline ~oarchive() { }
  };

  /**
   * \ingroup group_serialization
   * \brief
   * When this archive is used to serialize an object,
   * and the object does not support serialization,
   * failure will only occur at runtime. Otherwise equivalent to
   * graphlab::oarchive
   */
  class oarchive_soft_fail {
  public:
    oarchive* oarc;
    bool mine;

    /// constructor. Takes a generic std::ostream object
    inline oarchive_soft_fail(std::ostream& outstream)
      : oarc(new oarchive(outstream)), mine(true) { }

    inline oarchive_soft_fail(oarchive& oarc):oarc(&oarc), mine(false) {
    }

    inline oarchive_soft_fail(void)
      : oarc(new oarchive) {}

    /** Directly writes "s" bytes from the memory location
     * pointed to by "c" into the stream.
     */
    inline void write(const char* c, std::streamsize s) {
      oarc->write(c, s);
    }
    template <typename T>
    inline void direct_assign(const T& t) {
      oarc->direct_assign(t);
    }

    inline bool fail() {
      return oarc->fail();
    }

    inline ~oarchive_soft_fail() {
     if (mine) delete oarc;
    }
  };

  namespace archive_detail {

    /// called by the regular archive The regular archive will do a hard fail
    template <typename OutArcType, typename T>
    struct serialize_hard_or_soft_fail {
      inline static void exec(OutArcType& oarc, const T& t) {
        t.save(oarc);
      }
    };

    /// called by the soft fail archive
    template <typename T>
    struct serialize_hard_or_soft_fail<oarchive_soft_fail, T> {
      inline static void exec(oarchive_soft_fail& oarc, const T& t) {
        // create a regular oarchive and
        // use the save_or_fail function which will
        // perform a soft fail
        save_or_fail(*(oarc.oarc), t);
      }
    };


    /**
       Implementation of the serializer for different types.
       This is the catch-all. If it gets here, it must be a non-POD and is a class.
       We therefore call the .save function.
       Here we pick between the archive types using serialize_hard_or_soft_fail
    */
    template <typename OutArcType, typename T, bool IsPOD>
    struct serialize_impl {
      static void exec(OutArcType& oarc, const T& t) {
        serialize_hard_or_soft_fail<OutArcType, T>::exec(oarc, t);
      }
    };

    /** Catch if type is a POD */
    template <typename OutArcType, typename T>
    struct serialize_impl<OutArcType, T, true> {
      inline static void exec(OutArcType& oarc, const T& t) {
        oarc.direct_assign(t);
        //oarc.write(reinterpret_cast<const char*>(&t), sizeof(T));
      }
    };

    /**
       Re-dispatch if for some reasons T already has a const
    */
    template <typename OutArcType, typename T>
    struct serialize_impl<OutArcType, const T, true> {
      inline static void exec(OutArcType& oarc, const T& t) {
        serialize_impl<OutArcType, T, true>::exec(oarc, t);
      }
    };

    /**
       Re-dispatch if for some reasons T already has a const
    */
    template <typename OutArcType, typename T>
    struct serialize_impl<OutArcType, const T, false> {
      inline static void exec(OutArcType& oarc, const T& t) {
        serialize_impl<OutArcType, T, false>::exec(oarc, t);
      }
    };
  }// archive_detail


  /// \cond GRAPHLAB_INTERNAL

  /**
     Overloads the operator<< in the oarchive to
     allow the use of the stream syntax for serialization.
     It simply re-dispatches into the serialize_impl classes
  */
  template <typename T>
  inline oarchive& operator<<(oarchive& oarc, const T& t) {
    archive_detail::serialize_impl<oarchive,
                                   T,
                                   gl_is_pod<T>::value >::exec(oarc, t);
    return oarc;
  }

  /**
     Overloads the operator<< in the oarchive_soft_fail to
     allow the use of the stream syntax for serialization.
     It simply re-dispatches into the serialize_impl classes
  */
  template <typename T>
  inline oarchive_soft_fail& operator<<(oarchive_soft_fail& oarc,
                                        const T& t) {
    archive_detail::serialize_impl<oarchive_soft_fail,
                                  T,
                                  gl_is_pod<T>::value >::exec(oarc, t);
    return oarc;
  }


  /**
     Serializes an arbitrary pointer + length to an archive
  */
  inline oarchive& serialize(oarchive& oarc,
                             const void* str,
                             const size_t length) {
    // save the length
    oarc.write(reinterpret_cast<const char*>(str),
                    (std::streamsize)length);
    assert(!oarc.fail());
    return oarc;
  }


  /**
     Serializes an arbitrary pointer + length to an archive
  */
  inline oarchive_soft_fail& serialize(oarchive_soft_fail& oarc,
                                       const void* str,
                                       const size_t length) {
    // save the length
    oarc.write(reinterpret_cast<const char*>(str),
                    (std::streamsize)length);
    assert(!oarc.fail());
    return oarc;
  }

  /// \endcond GRAPHLAB_INTERNAL

}
  /**
     \ingroup group_serialization
     \brief Macro to make it easy to define out-of-place saves

     In the event that it is impractical to implement a save() and load()
     function in the class one wnats to serialize, it is necessary to define
     an "out of save" save and load.

     See \ref sec_serializable_out_of_place for an example

     \note important! this must be defined in the global namespace!
  */
#define BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval)                       \
  namespace graphlab{ namespace archive_detail {                        \
  template <typename OutArcType> struct serialize_impl<OutArcType, tname, false> { \
  static void exec(OutArcType& arc, const tname & tval) {

#define END_OUT_OF_PLACE_SAVE() } }; } }


#endif

#endif


================================================
FILE: src/graphlab/serialization/serializable_concept.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZABLE
#define GRAPHLAB_SERIALIZABLE
#include <boost/concept/assert.hpp>
#include <boost/concept/requires.hpp>
#include <boost/concept_check.hpp>
#include <sstream>
#include <graphlab/serialization/serialize.hpp>
namespace graphlab {

  /**
   * \brief Concept checks if a type T is serializable.
   *
   * This is a concept checking class for boost::concept and can be 
   * used to enforce that a type T is \ref sec_serializable, assignable and 
   * default constructible. 
   *
   * \tparam T The type to test for serializability.
   */
  template <typename T>
  class Serializable : boost::Assignable<T>, boost::DefaultConstructible<T> {
   public:
    BOOST_CONCEPT_USAGE(Serializable) {
      std::stringstream strm;
      oarchive oarc(strm);
      iarchive iarc(strm);
      const T const_t = T();
      T t = T();
      // A compiler error on these lines implies that your type is not
      // serializable.  See the documentaiton on how to make
      // serializable type.
      oarc << const_t;
      iarc >> t;
    }
  };

} // namespace graphlab
#endif


================================================
FILE: src/graphlab/serialization/serializable_pod.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef SERIALIZABLE_POD_HPP
#define SERIALIZABLE_POD_HPP

#include <graphlab/serialization/is_pod.hpp>

#define SERIALIZABLE_POD(tname)                   \
namespace graphlab {                              \
    template <>                                   \
    struct gl_is_pod<tname> {                     \
      BOOST_STATIC_CONSTANT(bool, value = true);  \
    };                                            \
}

#endif


================================================
FILE: src/graphlab/serialization/serialization.dox
================================================
/**

\page serialization Serialization 

We have a custom serialization scheme which is
designed for performance rather than compatibility. It does not perform type
checking, It does not perform pointer tracking, and has only limited support
across platforms. It has been tested, and should be compatible across x86
platforms.

For a summary of all serialization functionality see \ref group_serialization

There are two serialization classes \ref graphlab::oarchive and \ref
graphlab::iarchive.  The former does output, while the latter does input. To
include all serialization headers, \#include
<graphlab/serialization/serialization_includes.hpp>.

\section sec_basic_serialize Basic serialize/deserialize

To serialize data to disk, you just create an output archive, and associate it
wiith an output stream.

For instance, to serialize to a file called "file.bin":
\code
  std::ofstream fout("file.bin", std::fstream::binary); 
  graphlab::oarchive oarc(fout); 
\endcode

The << stream operators are then used to write data into the archive.

\code 
  int i = 10; 
  double j = 20; 
  std::vector<float> v(10,1.0); // create a vector of 10 "1.0" values
  oarc << i << j << v; 
\endcode

To read back, you use the iarchive with an input stream, and read back the
variables in the same order:

\code 
  std::ifstream fin("file.bin", std::fstream::binary); 
  graphlab::iarchive iarc(fout);
  int i;
  double j; 
  std::vector<float> v;
  iarc >> i >> j >> v; 
\endcode

\section sec_serializable Serializable
So what type of data is serializable?

\subsection sec_serializable_integers Integer Types
All integer datatypes are serializable.
\li <code> bool </code>
\li <code> char </code> and <code> unsigned char </code>
\li <code> short </code>and <code> unsigned short</code>
\li <code> int </code>and <code> unsigned int</code>
\li <code> long </code>and <code> unsigned long </code>
\li <code> long long </code>and <code> unsigned long long</code>

Since all fixed width integer types from stdint (int16_t, int32_t, etc)
are derived from these basic types, all fixed width integer types
are also serializable.

\li <code> int16_t </code> and <code> uint16_t </code>
\li <code> int32_t </code> and <code> uint32_t </code>
\li <code> int64_t </code> and <code> uint64_t </code>

All integer types are saved in their raw binary form without any additional
re-encoding. It is therefore important to deserialize with the 
same integer width as what was serialized.

The following code will fail in dramatic ways:
\code
  int i;
  oarc << i; // write some integer to a file

  ...
  // some time later we need to read back the integer.
  long j;
  iarc >> j; // this will fail 
\endcode

\subsection sec_serializable_floats Floating Point Types
All floating point data types are serializable.
\li <code>float</code>
\li <code>double</code>
\li <code>long double</code> if your compiler supports quad precision.

Similar to integer types, all floating types are saved in raw binary form
without re-encoding. You must deserialize with the same floating point width
as what was serialized. (i.e. if you serialize a double, you <b>must</b>
deserialize a double.

\subsection sec_serializable_containers Containers
The following template containers are serializable as long as the contained
types are all serializable. This can be recursively applied.
\li <code>std::vector</code>
\li <code>std::list</code>
\li <code>std::set</code>
\li <code>std::map</code>
\li <code>boost::unordered_set</code>
\li <code>boost::unordered_map</code>

For instance, a <code>std::vector<int></code> is serializable. 
A <code>std::list<std::vector<int> > </code> is therefore also serializable.

There is special handling for the std::vector<T> for performance in the event
that T is a simple POD (Plain Old Data) data type.  POD types are data types
which occupy a contiguous region in memory. For instance, basic types (double,
int, etc), or structs which contains only basic types. Such types can be
copied or replicated using a simple mem-copy operation and can be greatly
acceleration during serialization / deserialization. All basic data types
are automatically POD types. We will discuss structs and other user types
in the next section.


\section sec_serializable_user User Structs and Classes

To serialize a struct/class, all you need to do is to define a public load/save
function. For instance:

\code 
class TestClass{ 
  public: 
    int i, j; 
    std::vector<int> k;
    void save(graphlab::oarchive& oarc) const { 
      oarc << i << j << k; 
    }
    void load(graphlab::iarchive& iarc) { 
      iarc >> i >> j >> k; 
    } 
}; 
\endcode
The save() and load() function prototypes must match <b>exactly</b>.
Other conditions are that the class must be Default Constructible:
\code
  // it must be possible to create a variable of TestClass type like this
  TestClass a;
\endcode

And that the class must be Assignable:
 \code
  TestClass a, b;
  // it must be possible to assign one variable of TestClass to another
  b = a;
 \endcode 

After which, <code>TestClass</code> becomes serializable, and can be stored and
read from an archive:

\code
TestClass t; 
// set values to t
oarc << t;  // write it to a file

... some time afterwords...

TestClass t2;
iarc >> t2; // read it to a file
\endcode

Since <code>TestClass</code> is now serializable, containers of TestClass
listed in \ref sec_serializable_containers are also serializable.

\section sec_serializable_pod POD Serialization 

As mentioned in \ref sec_serializable_containers, POD data types 
occupy a contiguous region in memory and hence can be serialized and 
deserialized very quickly. Ideally, determination of whether a data type is 
POD or not should be handled by the compiler. However, this capability is only
available in C++11 and not all compilers support it yet. We therefore
implemented a simple workaround which will allow you to identify to the 
serializer that a class is POD, and avoid writing a save/load function.


We consider the following Coordinate struct.  
\code 
struct Coordinate{ 
  int x, y, z; 
}; 
\endcode

This struct can be defined to be a POD type using an accelerated serializer by
simply inheriting from graphlab::IS_POD_TYPE 

\code 
struct Coordinate: public graphlab::IS_POD_TYPE{ 
  int x, y, z; 
}; 
\endcode


Now, Coordinate variables, or even vector<Coordinate> variables will
serialize/deserialize faster. Also, you avoid writing a save() and load()
function.

\note
Currently POD detection is performed through the boost type traits library.
When compilers implement std::is_pod (in C++11), POD detection will improve,
increasing the scope of types which can be serialized quickly and automatically.
A minor concern is that the scope of POD types is still slightly too large,
since technically pointer types are POD, and those cannot not be 
serialized automatically.

\section sec_serializable_out_of_place Out of Place Serialization
In some situations, you may find that you need to make a data type serializable,
but the data type is implemented by someone else, in a different library,
making it impossible to extend and write a member save() and load() function
as described in \ref sec_serializable_user.

In this situation, it is necessary to implement an "Out of place" 
serializer. This is unfortunately somewhat more complicated.

For instance, if there is an external type implemented by some other library
called Matrix which I would like to make serializable. The following code will
have to be written in the <b>global namespace</b>

\code
BEGIN_OUT_OF_PLACE_SAVE(oarc, Matrix, mat)
  // write the "mat" variable which is of the type Matrix
  // into the output archive oarc
END_OUT_OF_PLACE_SAVE()

BEGIN_OUT_OF_PLACE_LOAD(iarc, Matrix, mat)
  // read the "mat" variable which is of the type Matrix
  // from the input archive iarc 
END_OUT_OF_PLACE_LOAD()
\endcode

To facilitate reading and writing of data from the archives, the output oarchive 
object provides an \ref graphlab::oarchive::write() oarchive::write() 
function which directly writes a sequence of bytes to the stream. Similarly,
the input iarchive object provides a 
\ref graphlab::iarchive::read() iarchive::read() function which directly reads
a sequence of bytes from the stream.

For instance, if the Matrix type example above is defined in the following way:

\code
struct Matrix {
  int width;  // width of the matrix
  int height; // height of the matrix
  double* data; // an array containing all the values in the matrix
  int datalen; // the number of elements in the "data" array.
}
\endcode

An "out of place" serializer could be implemented the following way:
\code
BEGIN_OUT_OF_PLACE_SAVE(oarc, Matrix, mat)
  // store the dimensions of the matrix
  oarc << mat.width << mat.height;

  // store the length of the data array
  oarc << mat.datalen;

  // write the double array
  oarc.write((char*)(mat.data), sizeof(double) * mat.datalen);
END_OUT_OF_PLACE_SAVE()

BEGIN_OUT_OF_PLACE_LOAD(iarc, Matrix, mat)
  // clear the matrix data if there is any
  if (mat.data != NULL) delete [] mat.data;

  // read the dimensions of the matrix
  iarc >> mat.width >> mat.height;

  // read the length of the data array
  iarc >> mat.datalen;

  // allocate sufficient storage for the array
  mat.data = new double[mat.datalen];

  // read the double array
  iarc.read((char*)(mat.data), sizeof(double) * mat.datalen);

END_OUT_OF_PLACE_LOAD()
\endcode

*/


================================================
FILE: src/graphlab/serialization/serialization_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/serialization/serialize.hpp>
#include <graphlab/serialization/serializable_concept.hpp>


================================================
FILE: src/graphlab/serialization/serialize.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_HPP
#define GRAPHLAB_SERIALIZE_HPP
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/basic_types.hpp>

#include <graphlab/serialization/list.hpp>
#include <graphlab/serialization/set.hpp>
#include <graphlab/serialization/vector.hpp>
#include <graphlab/serialization/map.hpp>
#include <graphlab/serialization/unordered_map.hpp>
#include <graphlab/serialization/unordered_set.hpp>
#include <graphlab/serialization/serializable_pod.hpp>
#include <graphlab/serialization/unsupported_serialize.hpp>
#include <graphlab/serialization/serialize_to_from_string.hpp>
#include <graphlab/serialization/conditional_serialize.hpp>
#endif


================================================
FILE: src/graphlab/serialization/serialize_to_from_string.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef SERIALIZE_TO_FROM_STRING_HPP
#define SERIALIZE_TO_FROM_STRING_HPP
#include <sstream>
#include <boost/iostreams/stream.hpp>

namespace graphlab {
  /**
   * \ingroup group_serialization
   * \brief Serializes a object to a string
   * 
   * Converts a \ref serializable object t to a string
   * using the serializer.
   * 
   * \tparam T the type of object to serialize. Typically
   *           will be inferred by the compiler. 
   *
   * \param t The object to serializer
   * \returns A string containing a serialized form of t 
   *
   * \see deserialize_from_string()
   */
  template <typename T>
  inline std::string serialize_to_string(const T &t) {
    std::stringstream strm;
    oarchive oarc(strm);
    oarc << t;
    strm.flush();
    return strm.str();
  }


  /**
   * \ingroup group_serialization
   * \brief Deserializes a object from a string
   * 
   * Deserializes a \ref serializable object t from a string
   * using the deserializer.
   * 
   * \tparam T the type of object to deserialize. Typically
   *           will be inferred by the compiler. 
   *
   * \param s The string to deserialize 
   * \param t A reference to the object which will contain 
   *          the deserialized object when the function returns
   *
   * \see serialize_from_string()
   */
  template <typename T>
  inline void deserialize_from_string(const std::string &s, T &t) {
    boost::iostreams::stream<boost::iostreams::array_source> 
          istrm(s.c_str(), s.length());   
    iarchive iarc(istrm);
    iarc >> t;
  }
}

#endif


================================================
FILE: src/graphlab/serialization/set.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_SET_HPP
#define GRAPHLAB_SERIALIZE_SET_HPP

#include <set>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>

namespace graphlab {
namespace archive_detail {
  /** serializes a set  */
  template <typename OutArcType, typename T>
  struct serialize_impl<OutArcType, std::set<T>, false > {
  static void exec(OutArcType& oarc, const std::set<T>& vec){
    serialize_iterator(oarc,
                       vec.begin(), vec.end(), vec.size());
  }
  };

  /** deserializes a set  */
  template <typename InArcType, typename T>
  struct deserialize_impl<InArcType, std::set<T>, false > {
  static void exec(InArcType& iarc, std::set<T>& vec){
    vec.clear();
    deserialize_iterator<InArcType, T>(iarc,
                                       std::inserter(vec,vec.end()));
  }
  };

} // archive_detail  
} // graphlab

#endif 


================================================
FILE: src/graphlab/serialization/unordered_map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_UNORDERED_MAP_HPP
#define GRAPHLAB_SERIALIZE_UNORDERED_MAP_HPP

#include <boost/unordered_map.hpp>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>

namespace graphlab {

namespace archive_detail {
  /** Serializes a map */
  template <typename OutArcType, typename T, typename U>
  struct serialize_impl<OutArcType, boost::unordered_map<T,U>, false > {
  static void exec(OutArcType& oarc, 
                   const boost::unordered_map<T,U>& vec){
    serialize_iterator(oarc, 
                       vec.begin(), vec.end(), vec.size());
  }
  };

  /** deserializes a map  */
      
  template <typename InArcType, typename T, typename U>
  struct deserialize_impl<InArcType, boost::unordered_map<T,U>, false > {
  static void exec(InArcType& iarc, boost::unordered_map<T,U>& vec){
    vec.clear();
    // get the number of elements to deserialize
    size_t length = 0;
    iarc >> length;    
    // iterate through and send to the output iterator
    for (size_t x = 0; x < length ; ++x){
      std::pair<T, U> v;
      iarc >> v;
      vec[v.first] = v.second;
    }
  }
  };

} // archive_detail  
} // graphlab
#endif 


================================================
FILE: src/graphlab/serialization/unordered_set.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_UNORDERED_SET_HPP
#define GRAPHLAB_SERIALIZE_UNORDERED_SET_HPP

#include <boost/unordered_set.hpp>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>

namespace graphlab {
namespace archive_detail {
  /** serializes a set  */
  template <typename OutArcType, typename T>
  struct serialize_impl<OutArcType, boost::unordered_set<T>, false > {
  static void exec(OutArcType& oarc, const boost::unordered_set<T>& vec){
    serialize_iterator(oarc,
                       vec.begin(), vec.end(), vec.size());
  }
  };

  /** deserializes a set  */
  template <typename InArcType, typename T>
  struct deserialize_impl<InArcType, boost::unordered_set<T>, false > {
  static void exec(InArcType& iarc, boost::unordered_set<T>& vec){
    vec.clear();
    // get the number of elements to deserialize
    size_t length = 0;
    iarc >> length;    
    // iterate through and send to the output iterator
    for (size_t x = 0; x < length ; ++x){
      T v;
      iarc >> v;
      vec.insert(v);
    }
  }
  };

} // archive_detail  
} // graphlab

#endif 


================================================
FILE: src/graphlab/serialization/unsupported_serialize.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UNSUPPORTED_SERIALIZE_HPP
#define GRAPHLAB_UNSUPPORTED_SERIALIZE_HPP

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/logger/logger.hpp>

namespace graphlab {

  /**
   * \ingroup group_serialization
   *  \brief Inheritting from this class will prevent the serialization
   *         of the derived class. Used for debugging purposes.
   * 
   *  Inheritting from this class will result in an assertion failure
   * if any attempt is made to serialize or deserialize the derived
   * class. This is largely used for debugging purposes to enforce
   * that certain types are never serialized 
   */
  struct unsupported_serialize {
    void save(oarchive& archive) const {      
      ASSERT_MSG(false, "trying to serialize an unserializable object");
    }
    void load(iarchive& archive) {
      ASSERT_MSG(false, "trying to deserialize an unserializable object");
    }
  }; // end of struct
};


/**
\ingroup group_serialization
\brief A macro which disables the serialization of type so that 
it will fault at runtime. 

Writing GRAPHLAB_UNSERIALIZABLE(T) for some typename T in the global namespace
will result in an assertion failure if any attempt is made to serialize or
deserialize the type T.  This is largely used for debugging purposes to enforce
that certain types are never serialized. 
*/
#define GRAPHLAB_UNSERIALIZABLE(tname) \
  BEGIN_OUT_OF_PLACE_LOAD(arc, tname, tval) \
    ASSERT_MSG(false,"trying to deserialize an unserializable object"); \
  END_OUT_OF_PLACE_LOAD()                                           \
  \
  BEGIN_OUT_OF_PLACE_SAVE(arc, tname, tval) \
    ASSERT_MSG(false,"trying to serialize an unserializable object"); \
  END_OUT_OF_PLACE_SAVE()                                           \


#endif


================================================
FILE: src/graphlab/serialization/vector.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SERIALIZE_VECTOR_HPP
#define GRAPHLAB_SERIALIZE_VECTOR_HPP
#include <vector>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iterator.hpp>


namespace graphlab {
  namespace archive_detail {
    /**
     * We re-dispatch vectors because based on the contained type,
     * it is actually possible to serialize them like a POD
     */
    template <typename OutArcType, typename ValueType, bool IsPOD>
    struct vector_serialize_impl {
      static void exec(OutArcType& oarc, const ValueType& vec) {
        // really this is an assert false. But the static assert
        // must depend on a template parameter 
        BOOST_STATIC_ASSERT(sizeof(OutArcType) == 0);
        assert(false);
      };
    };
    /**
     * We re-dispatch vectors because based on the contained type,
     * it is actually possible to deserialize them like iarc POD
     */
    template <typename InArcType, typename ValueType, bool IsPOD>
    struct vector_deserialize_impl {
      static void exec(InArcType& iarc, ValueType& vec) {
        // really this is an assert false. But the static assert
        // must depend on a template parameter 
        BOOST_STATIC_ASSERT(sizeof(InArcType) == 0);
        assert(false);
      };
    };
    
    /// If contained type is not a POD use the standard serializer
    template <typename OutArcType, typename ValueType>
    struct vector_serialize_impl<OutArcType, ValueType, false > {
      static void exec(OutArcType& oarc, const std::vector<ValueType>& vec) {
        oarc << size_t(vec.size());
        serialize_iterator(oarc,vec.begin(), vec.end());
      }
    };

    /// Fast vector serialization if contained type is a POD
    template <typename OutArcType, typename ValueType>
    struct vector_serialize_impl<OutArcType, ValueType, true > {
      static void exec(OutArcType& oarc, const std::vector<ValueType>& vec) {
        oarc << size_t(vec.size());
        serialize(oarc, &(vec[0]),sizeof(ValueType)*vec.size());
      }
    };

    /// If contained type is not a POD use the standard deserializer
    template <typename InArcType, typename ValueType>
    struct vector_deserialize_impl<InArcType, ValueType, false > {
      static void exec(InArcType& iarc, std::vector<ValueType>& vec){
        size_t len;
        iarc >> len;
        vec.clear(); vec.reserve(len);
        deserialize_iterator<InArcType, ValueType>(iarc, std::inserter(vec, vec.end()));
      }
    };

    /// Fast vector deserialization if contained type is a POD
    template <typename InArcType, typename ValueType>
    struct vector_deserialize_impl<InArcType, ValueType, true > {
      static void exec(InArcType& iarc, std::vector<ValueType>& vec){
        size_t len;
        iarc >> len;
        vec.clear(); vec.resize(len);
        deserialize(iarc, &(vec[0]), sizeof(ValueType)*vec.size());
      }
    };

    
    /**
       Serializes a vector */
    template <typename OutArcType, typename ValueType>
    struct serialize_impl<OutArcType, std::vector<ValueType>, false > {
      static void exec(OutArcType& oarc, const std::vector<ValueType>& vec) {
        vector_serialize_impl<OutArcType, ValueType, 
          gl_is_pod_or_scaler<ValueType>::value >::exec(oarc, vec);
      }
    };
    /**
       deserializes a vector */
    template <typename InArcType, typename ValueType>
    struct deserialize_impl<InArcType, std::vector<ValueType>, false > {
      static void exec(InArcType& iarc, std::vector<ValueType>& vec){
        vector_deserialize_impl<InArcType, ValueType, 
          gl_is_pod_or_scaler<ValueType>::value >::exec(iarc, vec);
      }
    };
  } // archive_detail
} // namespace graphlab

#endif 


================================================
FILE: src/graphlab/ui/CMakeLists.txt
================================================
project(GraphLab)

subdirs(mongoose)


================================================
FILE: src/graphlab/ui/graphlab_visualization.js
================================================
google.load("jquery", "1.5");
google.load("jqueryui", "1.7.2");
google.load("visualization", "1", 
            {"packages":["corechart", "table", "gauge"]});


var domain_str = "http://localhost:8090";
var update_interval = 5000;
var last_minutes=5;

function update_domain(value) {
    console.log("Setting domain to " + value);
    domain_str = value;
    initiate_job_info(); 
    initiate_aggregate_info();
    initiate_node_info();
}

// Start the rendering of the UI
google.setOnLoadCallback(function() { 
    initiate_job_info(); 
    initiate_aggregate_info();
    initiate_node_info();
});

function initiate_job_info() {
    jQuery.getJSON(domain_str + "/names.json", process_job_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_job_info, update_interval);
        });
}

function initiate_aggregate_info() {
    jQuery.getJSON(domain_str + "/metrics_aggregate.json?rate=1&rounding=1&tlast=" + (60*last_minutes), process_aggregate_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_aggregate_info, update_interval);
        });
}


function initiate_node_info() {
    jQuery.getJSON(domain_str + "/metrics_by_machine.json?rate=1&align=1&tlast=" + (60*last_minutes) , 
                   process_node_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_node_info, 5000);
        });
}


var job_info_data = [];
function process_job_info(data) {
    console.log("Processing job info.");
    $("#program_name").text(data.program_name);
    // $("#nprocs").text(data.nprocs + " processes");
    $("#current_time").text((data.time) + " seconds");

/*
    // Render all the current values
    var container = $("#gauges");
    var sorted_metrics = data.metrics.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_metrics, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var value = metric.rate_val;
        // if no job info has been created then create one as well as
        // the div to contain the display items
        if(job_info_data[id] == undefined) {
            // add a div the container
            var gauge_div_name = id + "_info_gauge";
            var str = 
                "<div class=\"metric_summary\" id=\"" + gauge_div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"value\">" + value + "</div>" +
                "<div class=\"gauge\"></div>" +
                "</div>";
            container.append(str);
            var div = container.children("#" + gauge_div_name);
            var gauge = new google.visualization.Gauge($(div).children(".gauge")[0]);
            job_info_data[id] = {
                div: div,
                gauge: gauge,
                options: {
                    animation: {duration: 100, easing: "linear"},
                    width: 400, height: 120,
                    min: 0, max: value + 1.0E-5},
                data:  google.visualization.arrayToDataTable([
                    ['Label', 'Value'], [name, value] ])
            };
        }
        // Get the job info
        var info = job_info_data[id];
        info.options.max = Math.max(info.options.max, value);
        // info.options.min = Math.min(info.options.min, value);
        info.data.setCell(0,1, value);
        info.gauge.draw(info.data, info.options);
        info.div.children(".value").text(value);

    });
*/
}


var aggregate_charts = []

function process_aggregate_info(data) {
    console.log("Processing aggregate info.");

   // Render all the current values
    var container = $("#aggregate");
    var sorted_data = data.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_data, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var units = metric.units;
        if(aggregate_charts[id] == undefined) {
            // add a div the container
            var div_name = id + "_aggregate_chart";
            var str = 
                "<div class=\"aggregate\" id=\"" + div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"chart\"></div>" +
                "</div>";
            container.append(str);
            var div = $(container.children("#" +  div_name)).children(".chart")[0]; 
            aggregate_charts[id] = {
                div: div,
                options: { title: name,
                           vAxis: {title: (units + " per Second"),
                                   titleTextStyle: {color: 'red'},
                                   minValue: 0,
                                   maxValue: 10,
                                   viewWindow: {min: 0}
                                   },
                           hAxis: {title: 'Time (seconds)'}},
                chart: new google.visualization.AreaChart(div),
            }
        }
        if(metric.record.length > 0) {
            // Update the chart
            var chart_info = aggregate_charts[id];
            chart_info.data = google.visualization.arrayToDataTable(
                [["Time", "Value"]].concat(metric.record));
            chart_info.chart.draw(chart_info.data, chart_info.options);
        }
    });

}


function tensor_to_table(tensor) {
    var table = new google.visualization.DataTable();
    table.addColumn("number", "Time");
    var numLines = tensor.length;
    var numRows = 0;
    for(var i = 0; i < numLines; ++i) {
        table.addColumn("number", "Node " + i);
    }
    numRows = tensor[0].length;
    for(var i = 0; i < tensor.length; ++i) {
        numRows = Math.min(numRows, tensor[i].length);
    }
    table.addRows(numRows);
    var counter = 0;
    for(var i = 0; i < numLines; ++i) {
        for(var j = 0; j < numRows; ++j) {
            if (i == 0) table.setValue(j, 0, tensor[i][j][0]);
            if (tensor[i][j][1] >= 0) {
              table.setValue(j, i+1, tensor[i][j][1]);
            }
        }
    }
    return table;
}


var node_charts = []

function process_node_info(data) {
    console.log("Processing node info.");

   // Render all the current values
    var container = $("#nodes");
    var sorted_data = data.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_data, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var units = metric.units;
        if(node_charts[id] == undefined) {
            // add a div the container
            var div_name = id + "_node_chart";
            var str = 
                "<div class=\"node\" id=\"" + div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"chart\"></div>" +
                "</div>";
            container.append(str);
            var div = $(container.children("#" +  div_name)).children(".chart")[0]; 
            node_charts[id] = {
                div: div,
                options: { title: name,
                           //isStacked: true,
                           enableInteractivity: 0,
                           vAxis: {title: (units + " per Second"),
                                   titleTextStyle: {color: 'red'},
                                   minValue: 0,
                                   maxValue: 10,
                                   viewWindow: {min: 0}
                           },
                           hAxis: {title: 'Time (seconds)'}},
                chart: new google.visualization.LineChart(div),
            }
        }
        if(metric.record.length > 0) {
            // Update the chart
            var chart_info = node_charts[id];
            chart_info.data = tensor_to_table(metric.record);
            chart_info.chart.draw(chart_info.data, chart_info.options);
        }
    });

}


================================================
FILE: src/graphlab/ui/index.html
================================================
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
<meta name="author" content="GraphLab Dev Team" />
<meta name="publisher" content="Carnegie Mellon University" />
<meta name="copyright" content="(c) 2012. GraphLab.org" />
<meta name="distribution" content="global" />
<meta name="keywords" content="GraphLabUI" />
<meta name="description" content="GraphLab Dashboard" />
<meta name="robots" content="all" />
<title>GraphLab Dashboard </title>
<link type="text/css" rel="stylesheet" href="style.css"/> 


<!-- Load external APIs -->
<script 
   type="text/javascript" 
   src="https://www.google.com/jsapi">
</script>


<!-- The Main visualization script -->
<script 
   type="text/javascript" 
   src="graphlab_visualization.js">
</script>
</head>


<body>
<h1> GraphLab Visualization Dashboard </h1>

<INPUT TYPE="text" NAME="inputbox" id="newdomain" VALUE="http://localhost:8090"
  onkeydown="if (event.keyCode == 13) document.getElementById('button').click()">
<INPUT TYPE="button" NAME="button" id="button" Value="Change Domain" onClick="update_domain(document.getElementById('newdomain').value)">

<div id="job_info">
  <div id="program_name"> </div>
  <!-- <div id="nprocs"> </div> -->
  <div id="current_time"> </div>
</div>

<div id="instrument_panel">
  <div id="gauges"> </div>
  <div id="aggregate"> </div>
  <div id="nodes"> </div>
</div>

</body>


</html>


================================================
FILE: src/graphlab/ui/intel_demo/graph_builder.json
================================================
{ "phase_name" : "parsing",
  "sys_metrics" : [
  { 
    "label": "cpu",
    "units": "% Utilization",
    "id": 1,
    "values": [
      ["time", "phase1", "phas2", "phase3"],
      [0, 23.4, 0, 17],
      [1, 2, 1, 9],
      [2, 0, 18, {} ],
      [3, 0, 2, {} ],
      [4, 0, 0, {}]]
  }, 
  { 
    "label": "Network",
    "units": "MB / Second",
    "id": 2,
    "values": [
      ["time", "cpu1", "cpu2", "cpu3"],
      [0, 23.4, 25, 17],
      [1, 24, 18, 9],
      [2, 27, 3, 5],
      [3, 40, 2, 8],
      [4, 75, 12, 2]]
  }, 
  { 
    "label": "Disk",
    "units": "MB / Second",
    "id": 3,
    "values": [
      ["time", "cpu1", "cpu2", "cpu3"],
      [0, 23.4, 25, 17],
      [1, 24, 18, 9],
      [2, 27, 3, 5],
      [3, 40, 2, 8],
      [4, 75, 12, 2]]

  }
  ]
 }

================================================
FILE: src/graphlab/ui/intel_demo/graphlab_visualization.js
================================================
google.load("jquery", "1.5");
google.load("jqueryui", "1.7.2");
google.load("visualization", "1", 
            {"packages":["corechart", "table", "gauge"]});


var domain_str = "http://localhost:8090";
var update_interval = 5000;
var last_minutes=5;

// This function is called when the update domain button is clicked
function update_domain(value) {
    console.log("Setting domain to " + value);
    domain_str = value;
    start();
}

// Start the rendering of the UI
google.setOnLoadCallback(start);

// Start rendering the UI
function start() { 
    initiate_graph_builder();
    initiate_job_info(); 
    initiate_aggregate_info();
    initiate_node_info();
}

function initiate_job_info() {
    jQuery.getJSON(domain_str + "/names.json", process_job_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_job_info, update_interval);
        });
}

function initiate_aggregate_info() {
    jQuery.getJSON(domain_str + "/metrics_aggregate.json?rate=1&rounding=1&tlast=" + (60*last_minutes), process_aggregate_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_aggregate_info, update_interval);
        });
}


function initiate_node_info() {
    jQuery.getJSON(domain_str + "/metrics_by_machine.json?rate=1&align=1&tlast=" + (60*last_minutes) , 
                   process_node_info)
        .error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");})
        .complete(function() {
            setTimeout(initiate_node_info, 5000);
        });
}


var job_info_data = [];
function process_job_info(data) {
    console.log("Processing job info.");
    $("#program_name").text(data.program_name);
    // $("#nprocs").text(data.nprocs + " processes");
    $("#current_time").text((data.time) + " seconds");

/*
    // Render all the current values
    var container = $("#gauges");
    var sorted_metrics = data.metrics.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_metrics, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var value = metric.rate_val;
        // if no job info has been created then create one as well as
        // the div to contain the display items
        if(job_info_data[id] == undefined) {
            // add a div the container
            var gauge_div_name = id + "_info_gauge";
            var str = 
                "<div class=\"metric_summary\" id=\"" + gauge_div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"value\">" + value + "</div>" +
                "<div class=\"gauge\"></div>" +
                "</div>";
            container.append(str);
            var div = container.children("#" + gauge_div_name);
            var gauge = new google.visualization.Gauge($(div).children(".gauge")[0]);
            job_info_data[id] = {
                div: div,
                gauge: gauge,
                options: {
                    animation: {duration: 100, easing: "linear"},
                    width: 400, height: 120,
                    min: 0, max: value + 1.0E-5},
                data:  google.visualization.arrayToDataTable([
                    ['Label', 'Value'], [name, value] ])
            };
        }
        // Get the job info
        var info = job_info_data[id];
        info.options.max = Math.max(info.options.max, value);
        // info.options.min = Math.min(info.options.min, value);
        info.data.setCell(0,1, value);
        info.gauge.draw(info.data, info.options);
        info.div.children(".value").text(value);

    });
*/
}


var aggregate_charts = []

function process_aggregate_info(data) {
    console.log("Processing aggregate info.");

   // Render all the current values
    var container = $("#aggregate");
    var sorted_data = data.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_data, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var units = metric.units;
        if(aggregate_charts[id] == undefined) {
            // add a div the container
            var div_name = id + "_aggregate_chart";
            var str = 
                "<div class=\"aggregate\" id=\"" + div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"chart\"></div>" +
                "</div>";
            container.append(str);
            var div = $(container.children("#" +  div_name)).children(".chart")[0]; 
            aggregate_charts[id] = {
                div: div,
                options: { title: name,
                           vAxis: {title: (units + " per Second"),
                                   titleTextStyle: {color: 'red'},
                                   minValue: 0,
                                   maxValue: 10,
                                   viewWindow: {min: 0}
                                   },
                           hAxis: {title: 'Time (seconds)'}},
                chart: new google.visualization.AreaChart(div),
            }
        }
        if(metric.record.length > 0) {
            // Update the chart
            var chart_info = aggregate_charts[id];
            chart_info.data = google.visualization.arrayToDataTable(
                [["Time", "Value"]].concat(metric.record));
            chart_info.chart.draw(chart_info.data, chart_info.options);
        }
    });

}


function tensor_to_table(tensor) {
    var table = new google.visualization.DataTable();
    table.addColumn("number", "Time");
    var numLines = tensor.length;
    var numRows = 0;
    for(var i = 0; i < numLines; ++i) {
        table.addColumn("number", "Node " + i);
    }
    numRows = tensor[0].length;
    for(var i = 0; i < tensor.length; ++i) {
        numRows = Math.min(numRows, tensor[i].length);
    }
    table.addRows(numRows);
    var counter = 0;
    for(var i = 0; i < numLines; ++i) {
        for(var j = 0; j < numRows; ++j) {
            if (i == 0) table.setValue(j, 0, tensor[i][j][0]);
            if (tensor[i][j][1] >= 0) {
              table.setValue(j, i+1, tensor[i][j][1]);
            }
        }
    }
    return table;
}


var node_charts = []

function process_node_info(data) {
    console.log("Processing node info.");

   // Render all the current values
    var container = $("#nodes");
    var sorted_data = data.sort(function(a,b) { 
        return a.id - b.id; 
    });
    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_data, function(i, metric) {
        var id = metric.id;
        var name = metric.name;
        var units = metric.units;
        if(node_charts[id] == undefined) {
            // add a div the container
            var div_name = id + "_node_chart";
            var str = 
                "<div class=\"node\" id=\"" + div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"chart\"></div>" +
                "</div>";
            container.append(str);
            var div = $(container.children("#" +  div_name)).children(".chart")[0]; 
            node_charts[id] = {
                div: div,
                options: { title: name,
                           //isStacked: true,
                           enableInteractivity: 0,
                           vAxis: {title: (units + " per Second"),
                                   titleTextStyle: {color: 'red'},
                                   minValue: 0,
                                   maxValue: 10,
                                   viewWindow: {min: 0}
                           },
                           hAxis: {title: 'Time (seconds)'}},
                chart: new google.visualization.LineChart(div),
            }
        }
        if(metric.record.length > 0) {
            // Update the chart
            var chart_info = node_charts[id];
            chart_info.data = tensor_to_table(metric.record);
            chart_info.chart.draw(chart_info.data, chart_info.options);
        }
    });

}


function initiate_graph_builder() {
    jQuery.getJSON("graph_builder.json", process_graph_builder)
        .error(function() { 
            console.log("Unable to access graph_builder will try again.");})
        .complete(function() {
            setTimeout(initiate_graph_builder, update_interval);
        });
}


var graph_builder_charts = [];

function process_graph_builder(data) {
//    console.log(data);    
    console.log("Processing graph builder info.");

    $("#graph_builder_phase_name").text(data.phase_name);


    // Render all the current values
    var container = $("#graph_builder_info");
    var sorted_data = data.sys_metrics.sort(function(a,b) { return a.id - b.id; });
    

    // Build an array of divs one for each metric with the name and value
    jQuery.each(sorted_data, function(i, metric) {
        var id = metric.id;
        var name = metric.label;
        var units = metric.units;
        if(graph_builder_charts[id] == undefined) {
            var div_name = id + "_graph_builder_chart";
            var str = 
                "<div class=\"graph_builder\" id=\"" + div_name  + "\">" +
                "<div class=\"name\">"  + name  + "</div>" +
                "<div class=\"chart\"></div>" +
                "</div>";
            container.append(str);
            var div = $(container.children("#" +  div_name)).children(".chart")[0]; 
            graph_builder_charts[id] = {
                div: div,
                options: { title: name,
                           vAxis: {title: (units + " per Second"),
                                   titleTextStyle: {color: 'red'},
                                   minValue: 0,
                                   maxValue: 10,
                                   viewWindow: {min: 0}
                                   },
                           hAxis: {title: 'Time (seconds)'}},
                chart: new google.visualization.AreaChart(div),
            }
        }        
        if(metric.values.length > 0) {
            // Update the chart
            var chart_info = graph_builder_charts[id];
            chart_info.data = google.visualization.arrayToDataTable(metric.values);
            chart_info.chart.draw(chart_info.data, chart_info.options);
        }
    });

    
}


================================================
FILE: src/graphlab/ui/intel_demo/index.html
================================================
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
<meta name="author" content="GraphLab Dev Team" />
<meta name="publisher" content="Carnegie Mellon University" />
<meta name="copyright" content="(c) 2012. GraphLab.org" />
<meta name="distribution" content="global" />
<meta name="keywords" content="GraphLabUI" />
<meta name="description" content="GraphLab Dashboard" />
<meta name="robots" content="all" />
<title>GraphLab Dashboard </title>
<link type="text/css" rel="stylesheet" href="style.css"/> 


<!-- Load external APIs -->
<script 
   type="text/javascript" 
   src="https://www.google.com/jsapi">
</script>


<!-- The Main visualization script -->
<script 
   type="text/javascript" 
   src="graphlab_visualization.js">
</script>
</head>


<body>
<h1> GraphLab Visualization Dashboard </h1>

<INPUT TYPE="text" NAME="inputbox" id="newdomain" VALUE="http://localhost:8090"
  onkeydown="if (event.keyCode == 13) document.getElementById('button').click()">
<INPUT TYPE="button" NAME="button" id="button" Value="Change Domain" onClick="update_domain(document.getElementById('newdomain').value)">

<div id="job_info">
  <div id="program_name"> </div>
  <!-- <div id="nprocs"> </div> -->
  <div id="current_time"> </div>
</div>

<div id="instrument_panel">
  <div id="graph_builder_info">
    <div id="graph_builder_phase_name"> </div>
  </div>
  <div id="gauges"> </div>
  <div id="aggregate"> </div>
  <div id="nodes"> </div>
</div>

</body>


</html>


================================================
FILE: src/graphlab/ui/intel_demo/style.css
================================================
body {
    text-align: center;
}
#instrument_panel {
    text-align: center;
}

.metric_summary {
    display: inline-block;
    
}

.metric_summary .name {
    display: none;
    text-align: center;
}

.metric_summary .value {
    display: none;
    text-align: center;
}


.aggregate {
    display: inline-block;
}

.aggregate .chart {
   width: 400px;
   height: 200px;
}

.aggregate .name {
  display: none
}


.aggregate .chart {
   width: 400px;
   height: 200px;
}

.node {
    display: inline-block;
}


.node .name {
  display: none
}


.node .chart {
   width: 400px;
   height: 200px;
}


.graph_builder {
    display: inline-block;
}

.graph_builder .chart {
   width: 400px;
   height: 200px;
}

.graph_builder .name {
  display: none
}


.graph_builder .chart {
   width: 400px;
   height: 200px;
}


================================================
FILE: src/graphlab/ui/metrics_server.cpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <unistd.h>
#include <string>
#include <map>
#include <utility>
#include <sstream>
#include <boost/function.hpp>

#include <graphlab/util/stl_util.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/distributed_event_log.hpp>

#include <graphlab/ui/mongoose/mongoose.h>
#include <graphlab/ui/metrics_server.hpp>

#include <graphlab/macros_def.hpp>


namespace graphlab {


static mg_context* metric_context = NULL;

static rwlock& callback_lock() {
  static rwlock clock;
  return clock;
}


static std::map<std::string, http_redirect_callback_type>& callbacks() {
  static std::map<std::string, http_redirect_callback_type> cback;
  return cback;
}


static void* process_request(enum mg_event event,
                             struct mg_connection* conn,
                             const struct mg_request_info* info) {
  if (event == MG_NEW_REQUEST) {

    // get the URL being requested
    std::string url;
    if (info->uri != NULL) url = info->uri;
    // strip the starting /
    if (url.length() >= 1) url = url.substr(1, url.length() - 1);
    // get all the variables
    std::map<std::string, std::string> variable_map;
    if (info->query_string != NULL) {
      std::string qs = info->query_string;
      std::vector<std::string> terms = strsplit(qs, "&", true);
      // now for each term..
      foreach(std::string& term, terms) {
        // get the variable name
        std::vector<std::string> key_val = strsplit(term, "=", true);
        if (key_val.size() > 0) {
          // use mg_get_var to read the actual variable.
          // since mg_get_var does http escape sequence decoding
          std::string key = key_val[0];
          char val_target[8192];
          int ret = mg_get_var(qs.c_str(), qs.length(), 
                               key.c_str(), val_target, 8192);
          if (ret >= 0) variable_map[key] = val_target;
        }
      }
    }
    callback_lock().readlock();
    // now redirect to the callback handlers. if we find one
    std::map<std::string, http_redirect_callback_type>::iterator iter = 
                                                       callbacks().find(url);

    if (iter != callbacks().end()) {
      std::pair<std::string, std::string> returnval = iter->second(variable_map);

      callback_lock().rdunlock();

      std::string ctype = returnval.first;
      std::string body = returnval.second;
      mg_printf(conn,
              "HTTP/1.1 200 OK\r\n"
              "Access-Control-Allow-Origin: *\r\n"
              "Access-Control-Allow-Methods: GET\r\n"
              "Content-Type: %s\r\n"
              "Content-Length: %d\r\n" 
              "\r\n",
              ctype.c_str(), (int) body.length());
      mg_write(conn, body.c_str(), body.length());
    }
    else {
      std::map<std::string, http_redirect_callback_type>::iterator iter404 =
                                                          callbacks().find("404");
      std::pair<std::string, std::string> returnval;
      if (iter404 != callbacks().end()) returnval = iter404->second(variable_map);
      
      callback_lock().rdunlock();

      std::string ctype = returnval.first;
      std::string body = returnval.second;

      mg_printf(conn,
              "HTTP/1.1 404 Not Found\r\n"
              "Access-Control-Allow-Origin: *\r\n"
              "Content-Type: %s\r\n"
              "Content-Length: %d\r\n" 
              "\r\n",
              ctype.c_str(), (int)body.length());
      mg_write(conn, body.c_str(), body.length());
    }

    return (void*)"";
  }
  else {
    return NULL;
  }
}


/*
   Simple 404 handler. Just reuturns a string "Page Not Found"
   */
std::pair<std::string, std::string> 
four_oh_four(std::map<std::string, std::string>& varmap) {
  return std::make_pair(std::string("text/html"), 
                        std::string("Page Not Found"));
}


/*
  Echo handler. Returns a html with get keys and values
 */
std::pair<std::string, std::string> 
echo(std::map<std::string, std::string>& varmap) {
  std::stringstream ret;
  std::map<std::string, std::string>::iterator iter = varmap.begin();
  ret << "<html>\n";
  while (iter != varmap.end()) {
    ret << iter->first << " = " << iter->second << "<br>\n"; 
    ++iter;
  }
  ret << "</html>\n";
  ret.flush();
  return std::make_pair(std::string("text/html"), ret.str());
}

std::pair<std::string, std::string> 
index_page(std::map<std::string, std::string>& varmap) {
  std::stringstream ret;
  ret << "<html>\n";
  ret << "<h3>Registered Handlers:</h3>\n";
  callback_lock().readlock();
  std::map<std::string, http_redirect_callback_type>::const_iterator iter = 
                            callbacks().begin();
  while (iter != callbacks().end()) {
    // don't put in the index page callback
    if (iter->first != "") {
      ret << iter->first << "<br>\n";
    }
    ++iter;
  }
  callback_lock().rdunlock();
  ret << "</html>\n";
  ret.flush();
  return std::make_pair(std::string("text/html"), ret.str());
}


static void fill_builtin_callbacks() {
  callbacks()["404"] = four_oh_four;
  callbacks()["echo"] = echo;
  callbacks()[""] = index_page;
  callbacks()["index.html"] = index_page;
}


void add_metric_server_callback(std::string page, 
                                http_redirect_callback_type callback) {
  callback_lock().writelock();
  callbacks()[page] = callback;
  callback_lock().wrunlock();
}

void launch_metric_server() {
  if (distributed_control::get_instance_procid() == 0) {
    const char *options[] = {"listening_ports", "8090", NULL};
    metric_context = mg_start(process_request, (void*)(&(callbacks())), options);
    if(metric_context == NULL) {
      logstream(LOG_ERROR) << "Unable to launch metrics server on port 8090. "
                           << "Metrics server will not be available" << std::endl;
      return;
    }
    fill_builtin_callbacks();

    char hostname[1024];
    std::string strhostname;
    if (gethostname(hostname, 1024) == 0) strhostname = hostname;
    logstream(LOG_EMPH) << "Metrics server now listening on " 
                   << "http://" << strhostname << ":8090" << std::endl;
  }
}

void stop_metric_server() {
  if (distributed_control::get_instance_procid() == 0 && metric_context != NULL) {
    std::cout << "Metrics server stopping." << std::endl;
    mg_stop(metric_context);
  }
}

void stop_metric_server_on_eof() {
  if (distributed_control::get_instance_procid() == 0 && metric_context != NULL) {
    char buff[128];
    // wait for ctrl-d
    logstream(LOG_EMPH) << "Hit Ctrl-D to stop the metrics server" << std::endl;
    while (fgets(buff, 128, stdin) != NULL );
    stop_metric_server();  
  }
}

} // namespace graphlab


================================================
FILE: src/graphlab/ui/metrics_server.hpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_METRICS_SERVER_HPP
#define GRAPHLAB_METRICS_SERVER_HPP
#include <string>
#include <map>
#include <utility>
#include <boost/function.hpp>


namespace graphlab {


/** 
    \ingroup httpserver
    The callback type used for add_metric_server_callback()
    See add_metric_server_callback() for details.
  */
typedef boost::function<std::pair<std::string, std::string>
                                (std::map<std::string, std::string>&)> 
        http_redirect_callback_type;


/**

  \ingroup httpserver
  \brief This is used to map a URL on the mtrics server 
             to a processing function.
  
  The processing function must have the prototype
  \code
  std::pair<std::string, std::string> callback(std::map<std::string, std::string>&)
  \endcode

  The processing function takes a map of GET variables to their corresponding
  values, and returns a pair of strings. (content_type, content)
  \li \c content type is the http content_type header. For instance text/plain
     or text/html.
  \li \c content is the actual body

  For instance: The builtin 404 handler looks like this:

  \code
  std::pair<std::string, std::string> 
  four_oh_four(std::map<std::string, std::string>& varmap) {
    return std::make_pair(std::string("text/html"), 
                        std::string("Page Not Found"));
  }
  \endcode

  \note The callbacks are only processed on machine 0 since only machine 0
  launches the server.

  \param page The page to map. For instance <code>page = "a.html"</code>
              will be shown on http://[server]/a.html
  \param callback The callback function to use to process the page
 */
void add_metric_server_callback(std::string page, 
                                http_redirect_callback_type callback);


/**
  \ingroup httpserver
  \brief Starts the metrics reporting server.

  The function may be called by all machines simultaneously since it only
  does useful work on machine 0. Only machine 0 will launch the web server.
 */
void launch_metric_server();


/**
  \ingroup httpserver
  \brief Stops the metrics reporting server if one is started.

  The function may be called by all machines simultaneously since it only
  does useful work on machine 0.
 */
void stop_metric_server();

/**
  \ingroup httpserver
  \brief Waits for a ctrl-D on machine 0, and 
         stops the metrics reporting server if one is started.

  The function may be called by all machines simultaneously since it only
  does useful work on machine 0. It waits for the stdin stream to close
  (when the user hit ctrl-d), then shuts down the server.
*/
void stop_metric_server_on_eof();

} // graphlab 
#endif // GRAPHLAB_METRICS_SERVER_HPP


================================================
FILE: src/graphlab/ui/mongoose/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/ui/mongoose/LICENSE
================================================
Copyright (c) 2004-2010 Sergey Lyubka

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: src/graphlab/ui/mongoose/mongoose.cpp
================================================
// Copyright (c) 2004-2011 Sergey Lyubka
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#define NO_SSL
#define NO_CGI

#if defined(_WIN32)
#define _CRT_SECURE_NO_WARNINGS // Disable deprecation warning in VS2005
#else
#define _XOPEN_SOURCE 600     // For flockfile() on Linux
#define _LARGEFILE_SOURCE     // Enable 64-bit file offsets
#define __STDC_FORMAT_MACROS  // <inttypes.h> wants this for C++
#define __STDC_LIMIT_MACROS   // C++ wants that for INT64_MAX
#endif

#if defined(__SYMBIAN32__)
#define NO_SSL // SSL is not supported
#define NO_CGI // CGI is not supported
#define PATH_MAX FILENAME_MAX
#endif // __SYMBIAN32__

#ifndef _WIN32_WCE // Some ANSI #includes are not available on Windows CE
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
#endif // !_WIN32_WCE

#include <time.h>
#include <stdlib.h>
#include <stdarg.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <stdio.h>

#if defined(_WIN32) && !defined(__SYMBIAN32__) // Windows specific
#define _WIN32_WINNT 0x0400 // To make it link in VS2005
#include <windows.h>

#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif

#ifndef _WIN32_WCE
#include <process.h>
#include <direct.h>
#include <io.h>
#else // _WIN32_WCE
#include <winsock2.h>
#define NO_CGI // WinCE has no pipes

typedef long off_t;
#define BUFSIZ  4096

#define errno   GetLastError()
#define strerror(x)  _ultoa(x, (char *) _alloca(sizeof(x) *3 ), 10)
#endif // _WIN32_WCE

#define MAKEUQUAD(lo, hi) ((uint64_t)(((uint32_t)(lo)) | \
      ((uint64_t)((uint32_t)(hi))) << 32))
#define RATE_DIFF 10000000 // 100 nsecs
#define EPOCH_DIFF MAKEUQUAD(0xd53e8000, 0x019db1de)
#define SYS2UNIX_TIME(lo, hi) \
  (time_t) ((MAKEUQUAD((lo), (hi)) - EPOCH_DIFF) / RATE_DIFF)

// Visual Studio 6 does not know __func__ or __FUNCTION__
// The rest of MS compilers use __FUNCTION__, not C99 __func__
// Also use _strtoui64 on modern M$ compilers
#if defined(_MSC_VER) && _MSC_VER < 1300
#define STRX(x) #x
#define STR(x) STRX(x)
#define __func__ "line " STR(__LINE__)
#define strtoull(x, y, z) strtoul(x, y, z)
#define strtoll(x, y, z) strtol(x, y, z)
#else
#define __func__  __FUNCTION__
#define strtoull(x, y, z) _strtoui64(x, y, z)
#define strtoll(x, y, z) _strtoi64(x, y, z)
#endif // _MSC_VER

#define ERRNO   GetLastError()
#define NO_SOCKLEN_T
#define SSL_LIB   "ssleay32.dll"
#define CRYPTO_LIB  "libeay32.dll"
#define DIRSEP '\\'
#define IS_DIRSEP_CHAR(c) ((c) == '/' || (c) == '\\')
#define O_NONBLOCK  0
#if !defined(EWOULDBLOCK)
#define EWOULDBLOCK  WSAEWOULDBLOCK
#endif // !EWOULDBLOCK
#define _POSIX_
#define INT64_FMT  "I64d"

#define WINCDECL __cdecl
#define SHUT_WR 1
#define snprintf _snprintf
#define vsnprintf _vsnprintf
#define sleep(x) Sleep((x) * 1000)

#define pipe(x) _pipe(x, BUFSIZ, _O_BINARY)
#define popen(x, y) _popen(x, y)
#define pclose(x) _pclose(x)
#define close(x) _close(x)
#define dlsym(x,y) GetProcAddress((HINSTANCE) (x), (y))
#define RTLD_LAZY  0
#define fseeko(x, y, z) fseek((x), (y), (z))
#define fdopen(x, y) _fdopen((x), (y))
#define write(x, y, z) _write((x), (y), (unsigned) z)
#define read(x, y, z) _read((x), (y), (unsigned) z)
#define flockfile(x) EnterCriticalSection(&global_log_file_lock)
#define funlockfile(x) LeaveCriticalSection(&global_log_file_lock)

#if !defined(fileno)
#define fileno(x) _fileno(x)
#endif // !fileno MINGW #defines fileno

typedef HANDLE pthread_mutex_t;
typedef struct {HANDLE signal, broadcast;} pthread_cond_t;
typedef DWORD pthread_t;
#define pid_t HANDLE // MINGW typedefs pid_t to int. Using #define here.

struct timespec {
  long tv_nsec;
  long tv_sec;
};

static int pthread_mutex_lock(pthread_mutex_t *);
static int pthread_mutex_unlock(pthread_mutex_t *);
static FILE *mg_fopen(const char *path, const char *mode);

#if defined(HAVE_STDINT)
#include <stdint.h>
#else
typedef unsigned int  uint32_t;
typedef unsigned short  uint16_t;
typedef unsigned __int64 uint64_t;
typedef __int64   int64_t;
#define INT64_MAX  9223372036854775807
#endif // HAVE_STDINT

// POSIX dirent interface
struct dirent {
  char d_name[PATH_MAX];
};

typedef struct DIR {
  HANDLE   handle;
  WIN32_FIND_DATAW info;
  struct dirent  result;
} DIR;

#else    // UNIX  specific
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <stdint.h>
#include <inttypes.h>
#include <netdb.h>

#include <pwd.h>
#include <unistd.h>
#include <dirent.h>
#include <pthread.h>
#if defined(__MACH__)
#define SSL_LIB   "libssl.dylib"
#define CRYPTO_LIB  "libcrypto.dylib"
#else
#if !defined(SSL_LIB)
#define SSL_LIB   "libssl.so"
#endif
#if !defined(CRYPTO_LIB)
#define CRYPTO_LIB  "libcrypto.so"
#endif
#endif
#define DIRSEP   '/'
#define IS_DIRSEP_CHAR(c) ((c) == '/')
#ifndef O_BINARY
#define O_BINARY  0
#endif // O_BINARY
#define closesocket(a) close(a)
#define mg_fopen(x, y) fopen(x, y)
#define mg_mkdir(x, y) mkdir(x, y)
#define mg_remove(x) remove(x)
#define mg_rename(x, y) rename(x, y)
#define ERRNO errno
#define INVALID_SOCKET (-1)
#define INT64_FMT PRId64
typedef int SOCKET;
#define WINCDECL

#endif // End of Windows and UNIX specific includes

#include "mongoose.h"

#define MONGOOSE_VERSION "3.1"
#define PASSWORDS_FILE_NAME ".htpasswd"
#define CGI_ENVIRONMENT_SIZE 4096
#define MAX_CGI_ENVIR_VARS 64
#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))

#ifdef _WIN32
static CRITICAL_SECTION global_log_file_lock;
static pthread_t pthread_self(void) {
  return GetCurrentThreadId();
}
#endif // _WIN32

#if defined(DEBUG)
#define DEBUG_TRACE(x) do { \
  flockfile(stdout); \
  printf("*** %lu.%p.%s.%d: ", \
         (unsigned long) time(NULL), (void *) pthread_self(), \
         __func__, __LINE__); \
  printf x; \
  putchar('\n'); \
  fflush(stdout); \
  funlockfile(stdout); \
} while (0)
#else
#define DEBUG_TRACE(x)
#endif // DEBUG

// Darwin prior to 7.0 and Win32 do not have socklen_t
#ifdef NO_SOCKLEN_T
typedef int socklen_t;
#endif // NO_SOCKLEN_T

#if !defined(MSG_NOSIGNAL)
#define MSG_NOSIGNAL 0
#endif

typedef void * (*mg_thread_func_t)(void *);

static const char *http_500_error = "Internal Server Error";

// Snatched from OpenSSL includes. I put the prototypes here to be independent
// from the OpenSSL source installation. Having this, mongoose + SSL can be
// built on any system with binary SSL libraries installed.
typedef struct ssl_st SSL;
typedef struct ssl_method_st SSL_METHOD;
typedef struct ssl_ctx_st SSL_CTX;

#define SSL_ERROR_WANT_READ 2
#define SSL_ERROR_WANT_WRITE 3
#define SSL_FILETYPE_PEM 1
#define CRYPTO_LOCK  1

#if defined(NO_SSL_DL)
extern void SSL_free(SSL *);
extern int SSL_accept(SSL *);
extern int SSL_connect(SSL *);
extern int SSL_read(SSL *, void *, int);
extern int SSL_write(SSL *, const void *, int);
extern int SSL_get_error(const SSL *, int);
extern int SSL_set_fd(SSL *, int);
extern SSL *SSL_new(SSL_CTX *);
extern SSL_CTX *SSL_CTX_new(SSL_METHOD *);
extern SSL_METHOD *SSLv23_server_method(void);
extern int SSL_library_init(void);
extern void SSL_load_error_strings(void);
extern int SSL_CTX_use_PrivateKey_file(SSL_CTX *, const char *, int);
extern int SSL_CTX_use_certificate_file(SSL_CTX *, const char *, int);
extern int SSL_CTX_use_certificate_chain_file(SSL_CTX *, const char *);
extern void SSL_CTX_set_default_passwd_cb(SSL_CTX *, mg_callback_t);
extern void SSL_CTX_free(SSL_CTX *);
extern unsigned long ERR_get_error(void);
extern char *ERR_error_string(unsigned long, char *);
extern int CRYPTO_num_locks(void);
extern void CRYPTO_set_locking_callback(void (*)(int, int, const char *, int));
extern void CRYPTO_set_id_callback(unsigned long (*)(void));
#else
// Dynamically loaded SSL functionality
struct ssl_func {
  const char *name;   // SSL function name
  void  (*ptr)(void); // Function pointer
};

#define SSL_free (* (void (*)(SSL *)) ssl_sw[0].ptr)
#define SSL_accept (* (int (*)(SSL *)) ssl_sw[1].ptr)
#define SSL_connect (* (int (*)(SSL *)) ssl_sw[2].ptr)
#define SSL_read (* (int (*)(SSL *, void *, int)) ssl_sw[3].ptr)
#define SSL_write (* (int (*)(SSL *, const void *,int)) ssl_sw[4].ptr)
#define SSL_get_error (* (int (*)(SSL *, int)) ssl_sw[5].ptr)
#define SSL_set_fd (* (int (*)(SSL *, SOCKET)) ssl_sw[6].ptr)
#define SSL_new (* (SSL * (*)(SSL_CTX *)) ssl_sw[7].ptr)
#define SSL_CTX_new (* (SSL_CTX * (*)(SSL_METHOD *)) ssl_sw[8].ptr)
#define SSLv23_server_method (* (SSL_METHOD * (*)(void)) ssl_sw[9].ptr)
#define SSL_library_init (* (int (*)(void)) ssl_sw[10].ptr)
#define SSL_CTX_use_PrivateKey_file (* (int (*)(SSL_CTX *, \
        const char *, int)) ssl_sw[11].ptr)
#define SSL_CTX_use_certificate_file (* (int (*)(SSL_CTX *, \
        const char *, int)) ssl_sw[12].ptr)
#define SSL_CTX_set_default_passwd_cb \
  (* (void (*)(SSL_CTX *, mg_callback_t)) ssl_sw[13].ptr)
#define SSL_CTX_free (* (void (*)(SSL_CTX *)) ssl_sw[14].ptr)
#define SSL_load_error_strings (* (void (*)(void)) ssl_sw[15].ptr)
#define SSL_CTX_use_certificate_chain_file \
  (* (int (*)(SSL_CTX *, const char *)) ssl_sw[16].ptr)

#define CRYPTO_num_locks (* (int (*)(void)) crypto_sw[0].ptr)
#define CRYPTO_set_locking_callback \
  (* (void (*)(void (*)(int, int, const char *, int))) crypto_sw[1].ptr)
#define CRYPTO_set_id_callback \
  (* (void (*)(unsigned long (*)(void))) crypto_sw[2].ptr)
#define ERR_get_error (* (unsigned long (*)(void)) crypto_sw[3].ptr)
#define ERR_error_string (* (char * (*)(unsigned long,char *)) crypto_sw[4].ptr)

// set_ssl_option() function updates this array.
// It loads SSL library dynamically and changes NULLs to the actual addresses
// of respective functions. The macros above (like SSL_connect()) are really
// just calling these functions indirectly via the pointer.
static struct ssl_func ssl_sw[] = {
  {"SSL_free",   NULL},
  {"SSL_accept",   NULL},
  {"SSL_connect",   NULL},
  {"SSL_read",   NULL},
  {"SSL_write",   NULL},
  {"SSL_get_error",  NULL},
  {"SSL_set_fd",   NULL},
  {"SSL_new",   NULL},
  {"SSL_CTX_new",   NULL},
  {"SSLv23_server_method", NULL},
  {"SSL_library_init",  NULL},
  {"SSL_CTX_use_PrivateKey_file", NULL},
  {"SSL_CTX_use_certificate_file",NULL},
  {"SSL_CTX_set_default_passwd_cb",NULL},
  {"SSL_CTX_free",  NULL},
  {"SSL_load_error_strings", NULL},
  {"SSL_CTX_use_certificate_chain_file", NULL},
  {NULL,    NULL}
};

#ifndef NO_SSL
// Similar array as ssl_sw. These functions could be located in different lib.
static struct ssl_func crypto_sw[] = {
  {"CRYPTO_num_locks",  NULL},
  {"CRYPTO_set_locking_callback", NULL},
  {"CRYPTO_set_id_callback", NULL},
  {"ERR_get_error",  NULL},
  {"ERR_error_string", NULL},
  {NULL,    NULL}
};
#endif
#endif // NO_SSL_DL

static const char *month_names[] = {
  "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};

// Unified socket address. For IPv6 support, add IPv6 address structure
// in the union u.
union usa {
  struct sockaddr sa;
  struct sockaddr_in sin;
#if defined(USE_IPV6)
  struct sockaddr_in6 sin6;
#endif
};

// Describes a string (chunk of memory).
struct vec {
  const char *ptr;
  size_t len;
};

// Structure used by mg_stat() function. Uses 64 bit file length.
struct mgstat {
  int is_directory;  // Directory marker
  int64_t size;      // File size
  time_t mtime;      // Modification time
};

// Describes listening socket, or socket which was accept()-ed by the master
// thread and queued for future handling by the worker thread.
struct socket {
  struct socket *next;  // Linkage
  SOCKET sock;          // Listening socket
  union usa lsa;        // Local socket address
  union usa rsa;        // Remote socket address
  int is_ssl;           // Is socket SSL-ed
};

enum {
  CGI_EXTENSIONS, CGI_ENVIRONMENT, PUT_DELETE_PASSWORDS_FILE, CGI_INTERPRETER,
  PROTECT_URI, AUTHENTICATION_DOMAIN, SSI_EXTENSIONS, ACCESS_LOG_FILE,
  SSL_CHAIN_FILE, ENABLE_DIRECTORY_LISTING, ERROR_LOG_FILE,
  GLOBAL_PASSWORDS_FILE, INDEX_FILES,
  ENABLE_KEEP_ALIVE, ACCESS_CONTROL_LIST, MAX_REQUEST_SIZE,
  EXTRA_MIME_TYPES, LISTENING_PORTS,
  DOCUMENT_ROOT, SSL_CERTIFICATE, NUM_THREADS, RUN_AS_USER, REWRITE,
  NUM_OPTIONS
};

static const char *config_options[] = {
  "C", "cgi_pattern", "**.cgi$|**.pl$|**.php$",
  "E", "cgi_environment", NULL,
  "G", "put_delete_passwords_file", NULL,
  "I", "cgi_interpreter", NULL,
  "P", "protect_uri", NULL,
  "R", "authentication_domain", "mydomain.com",
  "S", "ssi_pattern", "**.shtml$|**.shtm$",
  "a", "access_log_file", NULL,
  "c", "ssl_chain_file", NULL,
  "d", "enable_directory_listing", "yes",
  "e", "error_log_file", NULL,
  "g", "global_passwords_file", NULL,
  "i", "index_files", "index.html,index.htm,index.cgi",
  "k", "enable_keep_alive", "yes",
  "l", "access_control_list", NULL,
  "M", "max_request_size", "16384",
  "m", "extra_mime_types", NULL,
  "p", "listening_ports", "8080",
  "r", "document_root",  ".",
  "s", "ssl_certificate", NULL,
  "t", "num_threads", "10",
  "u", "run_as_user", NULL,
  "w", "url_rewrite_patterns", NULL,
  NULL
};
#define ENTRIES_PER_CONFIG_OPTION 3

struct mg_context {
  volatile int stop_flag;       // Should we stop event loop
  SSL_CTX *ssl_ctx;             // SSL context
  char *config[NUM_OPTIONS];    // Mongoose configuration parameters
  mg_callback_t user_callback;  // User-defined callback function
  void *user_data;              // User-defined data

  struct socket *listening_sockets;

  volatile int num_threads;  // Number of threads
  pthread_mutex_t mutex;     // Protects (max|num)_threads
  pthread_cond_t  cond;      // Condvar for tracking workers terminations

  struct socket queue[20];   // Accepted sockets
  volatile int sq_head;      // Head of the socket queue
  volatile int sq_tail;      // Tail of the socket queue
  pthread_cond_t sq_full;    // Singaled when socket is produced
  pthread_cond_t sq_empty;   // Signaled when socket is consumed
};

struct mg_connection {
  struct mg_request_info request_info;
  struct mg_context *ctx;
  SSL *ssl;                   // SSL descriptor
  struct socket client;       // Connected client
  time_t birth_time;          // Time connection was accepted
  int64_t num_bytes_sent;     // Total bytes sent to client
  int64_t content_len;        // Content-Length header value
  int64_t consumed_content;   // How many bytes of content is already read
  char *buf;                  // Buffer for received data
  char *path_info;            // PATH_INFO part of the URL
  int must_close;             // 1 if connection must be closed
  int buf_size;               // Buffer size
  int request_len;            // Size of the request + headers in a buffer
  int data_len;               // Total size of data in a buffer
};

const char **mg_get_valid_option_names(void) {
  return config_options;
}

static void *call_user(struct mg_connection *conn, enum mg_event event) {
  conn->request_info.user_data = conn->ctx->user_data;
  return conn->ctx->user_callback == NULL ? NULL :
    conn->ctx->user_callback(event, conn, &conn->request_info);
}

static int get_option_index(const char *name) {
  int i;

  for (i = 0; config_options[i] != NULL; i += ENTRIES_PER_CONFIG_OPTION) {
    if (strcmp(config_options[i], name) == 0 ||
        strcmp(config_options[i + 1], name) == 0) {
      return i / ENTRIES_PER_CONFIG_OPTION;
    }
  }
  return -1;
}

const char *mg_get_option(const struct mg_context *ctx, const char *name) {
  int i;
  if ((i = get_option_index(name)) == -1) {
    return NULL;
  } else if (ctx->config[i] == NULL) {
    return "";
  } else {
    return ctx->config[i];
  }
}

static void sockaddr_to_string(char *buf, size_t len,
                                     const union usa *usa) {
  buf[0] = '\0';
#if defined(USE_IPV6)
  inet_ntop(usa->sa.sa_family, usa->sa.sa_family == AF_INET ?
            (void *) &usa->sin.sin_addr :
            (void *) &usa->sin6.sin6_addr, buf, len);
#elif defined(_WIN32)
  // Only Windoze Vista (and newer) have inet_ntop()
  strncpy(buf, inet_ntoa(usa->sin.sin_addr), len);
#else
  inet_ntop(usa->sa.sa_family, (void *) &usa->sin.sin_addr, buf, len);
#endif
}

// Print error message to the opened error log stream.
static void cry(struct mg_connection *conn, const char *fmt, ...) {
  char buf[BUFSIZ], src_addr[20];
  va_list ap;
  FILE *fp;
  time_t timestamp;

  va_start(ap, fmt);
  (void) vsnprintf(buf, sizeof(buf), fmt, ap);
  va_end(ap);

  // Do not lock when getting the callback value, here and below.
  // I suppose this is fine, since function cannot disappear in the
  // same way string option can.
  conn->request_info.log_message = buf;
  if (call_user(conn, MG_EVENT_LOG) == NULL) {
    fp = conn->ctx->config[ERROR_LOG_FILE] == NULL ? NULL :
      mg_fopen(conn->ctx->config[ERROR_LOG_FILE], "a+");

    if (fp != NULL) {
      flockfile(fp);
      timestamp = time(NULL);

      sockaddr_to_string(src_addr, sizeof(src_addr), &conn->client.rsa);
      fprintf(fp, "[%010lu] [error] [client %s] ", (unsigned long) timestamp,
              src_addr);

      if (conn->request_info.request_method != NULL) {
        fprintf(fp, "%s %s: ", conn->request_info.request_method,
                conn->request_info.uri);
      }

      (void) fprintf(fp, "%s", buf);
      fputc('\n', fp);
      funlockfile(fp);
      if (fp != stderr) {
        fclose(fp);
      }
    }
  }
  conn->request_info.log_message = NULL;
}

#ifndef NO_SSL
// Return OpenSSL error message
static const char *ssl_error(void) {
  unsigned long err;
  err = ERR_get_error();
  return err == 0 ? "" : ERR_error_string(err, NULL);
}
#endif

// Return fake connection structure. Used for logging, if connection
// is not applicable at the moment of logging.
static struct mg_connection *fc(struct mg_context *ctx) {
  static struct mg_connection fake_connection;
  fake_connection.ctx = ctx;
  return &fake_connection;
}

const char *mg_version(void) {
  return MONGOOSE_VERSION;
}

static void mg_strlcpy(register char *dst, register const char *src, size_t n) {
  for (; *src != '\0' && n > 1; n--) {
    *dst++ = *src++;
  }
  *dst = '\0';
}

static int lowercase(const char *s) {
  return tolower(* (const unsigned char *) s);
}

static int mg_strncasecmp(const char *s1, const char *s2, size_t len) {
  int diff = 0;

  if (len > 0)
    do {
      diff = lowercase(s1++) - lowercase(s2++);
    } while (diff == 0 && s1[-1] != '\0' && --len > 0);

  return diff;
}

static int mg_strcasecmp(const char *s1, const char *s2) {
  int diff;

  do {
    diff = lowercase(s1++) - lowercase(s2++);
  } while (diff == 0 && s1[-1] != '\0');

  return diff;
}

static char * mg_strndup(const char *ptr, size_t len) {
  char *p;

  if ((p = (char *) malloc(len + 1)) != NULL) {
    mg_strlcpy(p, ptr, len + 1);
  }

  return p;
}

static char * mg_strdup(const char *str) {
  return mg_strndup(str, strlen(str));
}

// Like snprintf(), but never returns negative value, or the value
// that is larger than a supplied buffer.
// Thanks to Adam Zeldis to pointing snprintf()-caused vulnerability
// in his audit report.
static int mg_vsnprintf(struct mg_connection *conn, char *buf, size_t buflen,
                        const char *fmt, va_list ap) {
  int n;

  if (buflen == 0)
    return 0;

  n = vsnprintf(buf, buflen, fmt, ap);

  if (n < 0) {
    cry(conn, "vsnprintf error");
    n = 0;
  } else if (n >= (int) buflen) {
    cry(conn, "truncating vsnprintf buffer: [%.*s]",
        n > 200 ? 200 : n, buf);
    n = (int) buflen - 1;
  }
  buf[n] = '\0';

  return n;
}

static int mg_snprintf(struct mg_connection *conn, char *buf, size_t buflen,
                       const char *fmt, ...) {
  va_list ap;
  int n;

  va_start(ap, fmt);
  n = mg_vsnprintf(conn, buf, buflen, fmt, ap);
  va_end(ap);

  return n;
}

// Skip the characters until one of the delimiters characters found.
// 0-terminate resulting word. Skip the delimiter and following whitespaces if any.
// Advance pointer to buffer to the next word. Return found 0-terminated word.
// Delimiters can be quoted with quotechar.
static char *skip_quoted(char **buf, const char *delimiters, const char *whitespace, char quotechar) {
  char *p, *begin_word, *end_word, *end_whitespace;

  begin_word = *buf;
  end_word = begin_word + strcspn(begin_word, delimiters);

  // Check for quotechar
  if (end_word > begin_word) {
    p = end_word - 1;
    while (*p == quotechar) {
      // If there is anything beyond end_word, copy it
      if (*end_word == '\0') {
        *p = '\0';
        break;
      } else {
        size_t end_off = strcspn(end_word + 1, delimiters);
        memmove (p, end_word, end_off + 1);
        p += end_off; // p must correspond to end_word - 1
        end_word += end_off + 1;
      }
    }
    for (p++; p < end_word; p++) {
      *p = '\0';
    }
  }

  if (*end_word == '\0') {
    *buf = end_word;
  } else {
    end_whitespace = end_word + 1 + strspn(end_word + 1, whitespace);

    for (p = end_word; p < end_whitespace; p++) {
      *p = '\0';
    }

    *buf = end_whitespace;
  }

  return begin_word;
}

// Simplified version of skip_quoted without quote char
// and whitespace == delimiters
static char *skip(char **buf, const char *delimiters) {
  return skip_quoted(buf, delimiters, delimiters, 0);
}


// Return HTTP header value, or NULL if not found.
static const char *get_header(const struct mg_request_info *ri,
                              const char *name) {
  int i;

  for (i = 0; i < ri->num_headers; i++)
    if (!mg_strcasecmp(name, ri->http_headers[i].name))
      return ri->http_headers[i].value;

  return NULL;
}

const char *mg_get_header(const struct mg_connection *conn, const char *name) {
  return get_header(&conn->request_info, name);
}

// A helper function for traversing comma separated list of values.
// It returns a list pointer shifted to the next value, of NULL if the end
// of the list found.
// Value is stored in val vector. If value has form "x=y", then eq_val
// vector is initialized to point to the "y" part, and val vector length
// is adjusted to point only to "x".
static const char *next_option(const char *list, struct vec *val,
                               struct vec *eq_val) {
  if (list == NULL || *list == '\0') {
    // End of the list
    list = NULL;
  } else {
    val->ptr = list;
    if ((list = strchr(val->ptr, ',')) != NULL) {
      // Comma found. Store length and shift the list ptr
      val->len = list - val->ptr;
      list++;
    } else {
      // This value is the last one
      list = val->ptr + strlen(val->ptr);
      val->len = list - val->ptr;
    }

    if (eq_val != NULL) {
      // Value has form "x=y", adjust pointers and lengths
      // so that val points to "x", and eq_val points to "y".
      eq_val->len = 0;
      eq_val->ptr = (const char *) memchr(val->ptr, '=', val->len);
      if (eq_val->ptr != NULL) {
        eq_val->ptr++;  // Skip over '=' character
        eq_val->len = val->ptr + val->len - eq_val->ptr;
        val->len = (eq_val->ptr - val->ptr) - 1;
      }
    }
  }

  return list;
}

static int match_prefix(const char *pattern, int pattern_len, const char *str) {
  const char *or_str;
  int i, j, len, res;

  if ((or_str = (const char *) memchr(pattern, '|', pattern_len)) != NULL) {
    res = match_prefix(pattern, or_str - pattern, str);
    return res > 0 ? res :
        match_prefix(or_str + 1, (pattern + pattern_len) - (or_str + 1), str);
  }

  i = j = 0;
  res = -1;
  for (; i < pattern_len; i++, j++) {
    if (pattern[i] == '?' && str[j] != '\0') {
      continue;
    } else if (pattern[i] == '$') {
      return str[j] == '\0' ? j : -1;
    } else if (pattern[i] == '*') {
      i++;
      if (pattern[i] == '*') {
        i++;
        len = strlen(str + j);
      } else {
        len = strcspn(str + j, "/");
      }
      if (i == pattern_len) {
        return j + len;
      }
      do {
        res = match_prefix(pattern + i, pattern_len - i, str + j + len);
      } while (res == -1 && len-- > 0);
      return res == -1 ? -1 : j + res + len;
    } else if (pattern[i] != str[j]) {
      return -1;
    }
  }
  return j;
}

// HTTP 1.1 assumes keep alive if "Connection:" header is not set
// This function must tolerate situations when connection info is not
// set up, for example if request parsing failed.
static int should_keep_alive(const struct mg_connection *conn) {
  const char *http_version = conn->request_info.http_version;
  const char *header = mg_get_header(conn, "Connection");
  return (!conn->must_close &&
          !conn->request_info.status_code != 401 &&
          !mg_strcasecmp(conn->ctx->config[ENABLE_KEEP_ALIVE], "yes") &&
          (header == NULL && http_version && !strcmp(http_version, "1.1"))) ||
          (header != NULL && !mg_strcasecmp(header, "keep-alive"));
}

static const char *suggest_connection_header(const struct mg_connection *conn) {
  return should_keep_alive(conn) ? "keep-alive" : "close";
}

static void send_http_error(struct mg_connection *conn, int status,
                            const char *reason, const char *fmt, ...) {
  char buf[BUFSIZ];
  va_list ap;
  int len;

  conn->request_info.status_code = status;

  if (call_user(conn, MG_HTTP_ERROR) == NULL) {
    buf[0] = '\0';
    len = 0;

    // Errors 1xx, 204 and 304 MUST NOT send a body
    if (status > 199 && status != 204 && status != 304) {
      len = mg_snprintf(conn, buf, sizeof(buf), "Error %d: %s", status, reason);
      cry(conn, "%s", buf);
      buf[len++] = '\n';

      va_start(ap, fmt);
      len += mg_vsnprintf(conn, buf + len, sizeof(buf) - len, fmt, ap);
      va_end(ap);
    }
    DEBUG_TRACE(("[%s]", buf));

    mg_printf(conn, "HTTP/1.1 %d %s\r\n"
              "Content-Type: text/plain\r\n"
              "Content-Length: %d\r\n"
              "Connection: %s\r\n\r\n", status, reason, len,
              suggest_connection_header(conn));
    conn->num_bytes_sent += mg_printf(conn, "%s", buf);
  }
}

#if defined(_WIN32) && !defined(__SYMBIAN32__)
static int pthread_mutex_init(pthread_mutex_t *mutex, void *unused) {
  unused = NULL;
  *mutex = CreateMutex(NULL, FALSE, NULL);
  return *mutex == NULL ? -1 : 0;
}

static int pthread_mutex_destroy(pthread_mutex_t *mutex) {
  return CloseHandle(*mutex) == 0 ? -1 : 0;
}

static int pthread_mutex_lock(pthread_mutex_t *mutex) {
  return WaitForSingleObject(*mutex, INFINITE) == WAIT_OBJECT_0? 0 : -1;
}

static int pthread_mutex_unlock(pthread_mutex_t *mutex) {
  return ReleaseMutex(*mutex) == 0 ? -1 : 0;
}

static int pthread_cond_init(pthread_cond_t *cv, const void *unused) {
  unused = NULL;
  cv->signal = CreateEvent(NULL, FALSE, FALSE, NULL);
  cv->broadcast = CreateEvent(NULL, TRUE, FALSE, NULL);
  return cv->signal != NULL && cv->broadcast != NULL ? 0 : -1;
}

static int pthread_cond_wait(pthread_cond_t *cv, pthread_mutex_t *mutex) {
  HANDLE handles[] = {cv->signal, cv->broadcast};
  ReleaseMutex(*mutex);
  WaitForMultipleObjects(2, handles, FALSE, INFINITE);
  return WaitForSingleObject(*mutex, INFINITE) == WAIT_OBJECT_0? 0 : -1;
}

static int pthread_cond_signal(pthread_cond_t *cv) {
  return SetEvent(cv->signal) == 0 ? -1 : 0;
}

static int pthread_cond_broadcast(pthread_cond_t *cv) {
  // Implementation with PulseEvent() has race condition, see
  // http://www.cs.wustl.edu/~schmidt/win32-cv-1.html
  return PulseEvent(cv->broadcast) == 0 ? -1 : 0;
}

static int pthread_cond_destroy(pthread_cond_t *cv) {
  return CloseHandle(cv->signal) && CloseHandle(cv->broadcast) ? 0 : -1;
}

// For Windows, change all slashes to backslashes in path names.
static void change_slashes_to_backslashes(char *path) {
  int i;

  for (i = 0; path[i] != '\0'; i++) {
    if (path[i] == '/')
      path[i] = '\\';
    // i > 0 check is to preserve UNC paths, like \\server\file.txt
    if (path[i] == '\\' && i > 0)
      while (path[i + 1] == '\\' || path[i + 1] == '/')
        (void) memmove(path + i + 1,
            path + i + 2, strlen(path + i + 1));
  }
}

// Encode 'path' which is assumed UTF-8 string, into UNICODE string.
// wbuf and wbuf_len is a target buffer and its length.
static void to_unicode(const char *path, wchar_t *wbuf, size_t wbuf_len) {
  char buf[PATH_MAX], buf2[PATH_MAX], *p;

  mg_strlcpy(buf, path, sizeof(buf));
  change_slashes_to_backslashes(buf);

  // Point p to the end of the file name
  p = buf + strlen(buf) - 1;

  // Trim trailing backslash character
  while (p > buf && *p == '\\' && p[-1] != ':') {
    *p-- = '\0';
  }

   // Protect from CGI code disclosure.
   // This is very nasty hole. Windows happily opens files with
   // some garbage in the end of file name. So fopen("a.cgi    ", "r")
   // actually opens "a.cgi", and does not return an error!
  if (*p == 0x20 ||               // No space at the end
      (*p == 0x2e && p > buf) ||  // No '.' but allow '.' as full path
      *p == 0x2b ||               // No '+'
      (*p & ~0x7f)) {             // And generally no non-ascii chars
    (void) fprintf(stderr, "Rejecting suspicious path: [%s]", buf);
    wbuf[0] = L'\0';
  } else {
    // Convert to Unicode and back. If doubly-converted string does not
    // match the original, something is fishy, reject.
    memset(wbuf, 0, wbuf_len * sizeof(wchar_t));
    MultiByteToWideChar(CP_UTF8, 0, buf, -1, wbuf, (int) wbuf_len);
    WideCharToMultiByte(CP_UTF8, 0, wbuf, (int) wbuf_len, buf2, sizeof(buf2),
                        NULL, NULL);
    if (strcmp(buf, buf2) != 0) {
      wbuf[0] = L'\0';
    }
  }
}

#if defined(_WIN32_WCE)
static time_t time(time_t *ptime) {
  time_t t;
  SYSTEMTIME st;
  FILETIME ft;

  GetSystemTime(&st);
  SystemTimeToFileTime(&st, &ft);
  t = SYS2UNIX_TIME(ft.dwLowDateTime, ft.dwHighDateTime);

  if (ptime != NULL) {
    *ptime = t;
  }

  return t;
}

static struct tm *localtime(const time_t *ptime, struct tm *ptm) {
  int64_t t = ((int64_t) *ptime) * RATE_DIFF + EPOCH_DIFF;
  FILETIME ft, lft;
  SYSTEMTIME st;
  TIME_ZONE_INFORMATION tzinfo;

  if (ptm == NULL) {
    return NULL;
  }

  * (int64_t *) &ft = t;
  FileTimeToLocalFileTime(&ft, &lft);
  FileTimeToSystemTime(&lft, &st);
  ptm->tm_year = st.wYear - 1900;
  ptm->tm_mon = st.wMonth - 1;
  ptm->tm_wday = st.wDayOfWeek;
  ptm->tm_mday = st.wDay;
  ptm->tm_hour = st.wHour;
  ptm->tm_min = st.wMinute;
  ptm->tm_sec = st.wSecond;
  ptm->tm_yday = 0; // hope nobody uses this
  ptm->tm_isdst =
    GetTimeZoneInformation(&tzinfo) == TIME_ZONE_ID_DAYLIGHT ? 1 : 0;

  return ptm;
}

static struct tm *gmtime(const time_t *ptime, struct tm *ptm) {
  // FIXME(lsm): fix this.
  return localtime(ptime, ptm);
}

static size_t strftime(char *dst, size_t dst_size, const char *fmt,
                       const struct tm *tm) {
  (void) snprintf(dst, dst_size, "implement strftime() for WinCE");
  return 0;
}
#endif

static int mg_rename(const char* oldname, const char* newname) {
  wchar_t woldbuf[PATH_MAX];
  wchar_t wnewbuf[PATH_MAX];

  to_unicode(oldname, woldbuf, ARRAY_SIZE(woldbuf));
  to_unicode(newname, wnewbuf, ARRAY_SIZE(wnewbuf));

  return MoveFileW(woldbuf, wnewbuf) ? 0 : -1;
}


static FILE *mg_fopen(const char *path, const char *mode) {
  wchar_t wbuf[PATH_MAX], wmode[20];

  to_unicode(path, wbuf, ARRAY_SIZE(wbuf));
  MultiByteToWideChar(CP_UTF8, 0, mode, -1, wmode, ARRAY_SIZE(wmode));

  return _wfopen(wbuf, wmode);
}

static int mg_stat(const char *path, struct mgstat *stp) {
  int ok = -1; // Error
  wchar_t wbuf[PATH_MAX];
  WIN32_FILE_ATTRIBUTE_DATA info;

  to_unicode(path, wbuf, ARRAY_SIZE(wbuf));

  if (GetFileAttributesExW(wbuf, GetFileExInfoStandard, &info) != 0) {
    stp->size = MAKEUQUAD(info.nFileSizeLow, info.nFileSizeHigh);
    stp->mtime = SYS2UNIX_TIME(info.ftLastWriteTime.dwLowDateTime,
                               info.ftLastWriteTime.dwHighDateTime);
    stp->is_directory =
      info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
    ok = 0;  // Success
  }

  return ok;
}

static int mg_remove(const char *path) {
  wchar_t wbuf[PATH_MAX];
  to_unicode(path, wbuf, ARRAY_SIZE(wbuf));
  return DeleteFileW(wbuf) ? 0 : -1;
}

static int mg_mkdir(const char *path, int mode) {
  char buf[PATH_MAX];
  wchar_t wbuf[PATH_MAX];

  mode = 0; // Unused
  mg_strlcpy(buf, path, sizeof(buf));
  change_slashes_to_backslashes(buf);

  (void) MultiByteToWideChar(CP_UTF8, 0, buf, -1, wbuf, sizeof(wbuf));

  return CreateDirectoryW(wbuf, NULL) ? 0 : -1;
}

// Implementation of POSIX opendir/closedir/readdir for Windows.
static DIR * opendir(const char *name) {
  DIR *dir = NULL;
  wchar_t wpath[PATH_MAX];
  DWORD attrs;

  if (name == NULL) {
    SetLastError(ERROR_BAD_ARGUMENTS);
  } else if ((dir = (DIR *) malloc(sizeof(*dir))) == NULL) {
    SetLastError(ERROR_NOT_ENOUGH_MEMORY);
  } else {
    to_unicode(name, wpath, ARRAY_SIZE(wpath));
    attrs = GetFileAttributesW(wpath);
    if (attrs != 0xFFFFFFFF &&
        ((attrs & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY)) {
      (void) wcscat(wpath, L"\\*");
      dir->handle = FindFirstFileW(wpath, &dir->info);
      dir->result.d_name[0] = '\0';
    } else {
      free(dir);
      dir = NULL;
    }
  }

  return dir;
}

static int closedir(DIR *dir) {
  int result = 0;

  if (dir != NULL) {
    if (dir->handle != INVALID_HANDLE_VALUE)
      result = FindClose(dir->handle) ? 0 : -1;

    free(dir);
  } else {
    result = -1;
    SetLastError(ERROR_BAD_ARGUMENTS);
  }

  return result;
}

struct dirent * readdir(DIR *dir) {
  struct dirent *result = 0;

  if (dir) {
    if (dir->handle != INVALID_HANDLE_VALUE) {
      result = &dir->result;
      (void) WideCharToMultiByte(CP_UTF8, 0,
          dir->info.cFileName, -1, result->d_name,
          sizeof(result->d_name), NULL, NULL);

      if (!FindNextFileW(dir->handle, &dir->info)) {
        (void) FindClose(dir->handle);
        dir->handle = INVALID_HANDLE_VALUE;
      }

    } else {
      SetLastError(ERROR_FILE_NOT_FOUND);
    }
  } else {
    SetLastError(ERROR_BAD_ARGUMENTS);
  }

  return result;
}

#define set_close_on_exec(fd) // No FD_CLOEXEC on Windows

static int start_thread(struct mg_context *ctx, mg_thread_func_t f, void *p) {
  return _beginthread((void (__cdecl *)(void *)) f, 0, p) == -1L ? -1 : 0;
}

static HANDLE dlopen(const char *dll_name, int flags) {
  wchar_t wbuf[PATH_MAX];
  flags = 0; // Unused
  to_unicode(dll_name, wbuf, ARRAY_SIZE(wbuf));
  return LoadLibraryW(wbuf);
}

#if !defined(NO_CGI)
#define SIGKILL 0
static int kill(pid_t pid, int sig_num) {
  (void) TerminateProcess(pid, sig_num);
  (void) CloseHandle(pid);
  return 0;
}

static pid_t spawn_process(struct mg_connection *conn, const char *prog,
                           char *envblk, char *envp[], int fd_stdin,
                           int fd_stdout, const char *dir) {
  HANDLE me;
  char *p, *interp, cmdline[PATH_MAX], buf[PATH_MAX];
  FILE *fp;
  STARTUPINFOA si = { sizeof(si) };
  PROCESS_INFORMATION pi = { 0 };

  envp = NULL; // Unused

  // TODO(lsm): redirect CGI errors to the error log file
  si.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW;
  si.wShowWindow = SW_HIDE;

  me = GetCurrentProcess();
  (void) DuplicateHandle(me, (HANDLE) _get_osfhandle(fd_stdin), me,
      &si.hStdInput, 0, TRUE, DUPLICATE_SAME_ACCESS);
  (void) DuplicateHandle(me, (HANDLE) _get_osfhandle(fd_stdout), me,
      &si.hStdOutput, 0, TRUE, DUPLICATE_SAME_ACCESS);

  // If CGI file is a script, try to read the interpreter line
  interp = conn->ctx->config[CGI_INTERPRETER];
  if (interp == NULL) {
    buf[2] = '\0';
    mg_snprintf(conn, cmdline, sizeof(cmdline), "%s%c%s", dir, DIRSEP, prog);
    if ((fp = fopen(cmdline, "r")) != NULL) {
      (void) fgets(buf, sizeof(buf), fp);
      if (buf[0] != '#' || buf[1] != '!') {
        // First line does not start with "#!". Do not set interpreter.
        buf[2] = '\0';
      } else {
        // Trim whitespaces in interpreter name
        for (p = &buf[strlen(buf) - 1]; p > buf && isspace(*p); p--) {
          *p = '\0';
        }
      }
      (void) fclose(fp);
    }
    interp = buf + 2;
  }

  (void) mg_snprintf(conn, cmdline, sizeof(cmdline), "%s%s%s%c%s",
                     interp, interp[0] == '\0' ? "" : " ", dir, DIRSEP, prog);

  DEBUG_TRACE(("Running [%s]", cmdline));
  if (CreateProcessA(NULL, cmdline, NULL, NULL, TRUE,
        CREATE_NEW_PROCESS_GROUP, envblk, dir, &si, &pi) == 0) {
    cry(conn, "%s: CreateProcess(%s): %d",
        __func__, cmdline, ERRNO);
    pi.hProcess = (pid_t) -1;
  } else {
    (void) close(fd_stdin);
    (void) close(fd_stdout);
  }

  (void) CloseHandle(si.hStdOutput);
  (void) CloseHandle(si.hStdInput);
  (void) CloseHandle(pi.hThread);

  return (pid_t) pi.hProcess;
}
#endif // !NO_CGI

static int set_non_blocking_mode(SOCKET sock) {
  unsigned long on = 1;
  return ioctlsocket(sock, FIONBIO, &on);
}

#else
static int mg_stat(const char *path, struct mgstat *stp) {
  struct stat st;
  int ok;

  if (stat(path, &st) == 0) {
    ok = 0;
    stp->size = st.st_size;
    stp->mtime = st.st_mtime;
    stp->is_directory = S_ISDIR(st.st_mode);
  } else {
    ok = -1;
  }

  return ok;
}

static void set_close_on_exec(int fd) {
  (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
}

static int start_thread(struct mg_context *ctx, mg_thread_func_t func,
                        void *param) {
  pthread_t thread_id;
  pthread_attr_t attr;
  int retval;

  (void) pthread_attr_init(&attr);
  (void) pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
  // TODO(lsm): figure out why mongoose dies on Linux if next line is enabled
  // (void) pthread_attr_setstacksize(&attr, sizeof(struct mg_connection) * 5);

  if ((retval = pthread_create(&thread_id, &attr, func, param)) != 0) {
    cry(fc(ctx), "%s: %s", __func__, strerror(retval));
  }

  return retval;
}

#ifndef NO_CGI
static pid_t spawn_process(struct mg_connection *conn, const char *prog,
                           char *envblk, char *envp[], int fd_stdin,
                           int fd_stdout, const char *dir) {
  pid_t pid;
  const char *interp;

  envblk = NULL; // Unused

  if ((pid = fork()) == -1) {
    // Parent
    send_http_error(conn, 500, http_500_error, "fork(): %s", strerror(ERRNO));
  } else if (pid == 0) {
    // Child
    if (chdir(dir) != 0) {
      cry(conn, "%s: chdir(%s): %s", __func__, dir, strerror(ERRNO));
    } else if (dup2(fd_stdin, 0) == -1) {
      cry(conn, "%s: dup2(%d, 0): %s", __func__, fd_stdin, strerror(ERRNO));
    } else if (dup2(fd_stdout, 1) == -1) {
      cry(conn, "%s: dup2(%d, 1): %s", __func__, fd_stdout, strerror(ERRNO));
    } else {
      (void) dup2(fd_stdout, 2);
      (void) close(fd_stdin);
      (void) close(fd_stdout);

      // Execute CGI program. No need to lock: new process
      interp = conn->ctx->config[CGI_INTERPRETER];
      if (interp == NULL) {
        (void) execle(prog, prog, NULL, envp);
        cry(conn, "%s: execle(%s): %s", __func__, prog, strerror(ERRNO));
      } else {
        (void) execle(interp, interp, prog, NULL, envp);
        cry(conn, "%s: execle(%s %s): %s", __func__, interp, prog,
            strerror(ERRNO));
      }
    }
    exit(EXIT_FAILURE);
  } else {
    // Parent. Close stdio descriptors
    (void) close(fd_stdin);
    (void) close(fd_stdout);
  }

  return pid;
}
#endif // !NO_CGI

static int set_non_blocking_mode(SOCKET sock) {
  int flags;

  flags = fcntl(sock, F_GETFL, 0);
  (void) fcntl(sock, F_SETFL, flags | O_NONBLOCK);

  return 0;
}
#endif // _WIN32

// Write data to the IO channel - opened file descriptor, socket or SSL
// descriptor. Return number of bytes written.
static int64_t push(FILE *fp, SOCKET sock, SSL *ssl, const char *buf,
                    int64_t len) {
  int64_t sent;
  int n, k;

  sent = 0;
  while (sent < len) {

    // How many bytes we send in this iteration
    k = len - sent > INT_MAX ? INT_MAX : (int) (len - sent);

    if (ssl != NULL) {
      n = SSL_write(ssl, buf + sent, k);
    } else if (fp != NULL) {
      n = fwrite(buf + sent, 1, (size_t) k, fp);
      if (ferror(fp))
        n = -1;
    } else {
      n = send(sock, buf + sent, (size_t) k, MSG_NOSIGNAL);
    }

    if (n < 0)
      break;

    sent += n;
  }

  return sent;
}

// Read from IO channel - opened file descriptor, socket, or SSL descriptor.
// Return number of bytes read.
static int pull(FILE *fp, SOCKET sock, SSL *ssl, char *buf, int len) {
  int nread;

  if (ssl != NULL) {
    nread = SSL_read(ssl, buf, len);
  } else if (fp != NULL) {
    // Use read() instead of fread(), because if we're reading from the CGI
    // pipe, fread() may block until IO buffer is filled up. We cannot afford
    // to block and must pass all read bytes immediately to the client.
    nread = read(fileno(fp), buf, (size_t) len);
    if (ferror(fp))
      nread = -1;
  } else {
    nread = recv(sock, buf, (size_t) len, 0);
  }

  return nread;
}

int mg_read(struct mg_connection *conn, void *buf, size_t len) {
  int n, buffered_len, nread;
  const char *buffered;

  assert((conn->content_len == -1 && conn->consumed_content == 0) ||
         conn->consumed_content <= conn->content_len);
  DEBUG_TRACE(("%p %zu %lld %lld", buf, len,
               conn->content_len, conn->consumed_content));
  nread = 0;
  if (conn->consumed_content < conn->content_len) {

    // Adjust number of bytes to read.
    int64_t to_read = conn->content_len - conn->consumed_content;
    if (to_read < (int64_t) len) {
      len = (int) to_read;
    }

    // How many bytes of data we have buffered in the request buffer?
    buffered = conn->buf + conn->request_len + conn->consumed_content;
    buffered_len = conn->data_len - conn->request_len;
    assert(buffered_len >= 0);

    // Return buffered data back if we haven't done that yet.
    if (conn->consumed_content < (int64_t) buffered_len) {
      buffered_len -= (int) conn->consumed_content;
      if (len < (size_t) buffered_len) {
        buffered_len = len;
      }
      memcpy(buf, buffered, (size_t)buffered_len);
      len -= buffered_len;
      buf = (char *) buf + buffered_len;
      conn->consumed_content += buffered_len;
      nread = buffered_len;
    }

    // We have returned all buffered data. Read new data from the remote socket.
    while (len > 0) {
      n = pull(NULL, conn->client.sock, conn->ssl, (char *) buf, (int) len);
      if (n <= 0) {
        break;
      }
      buf = (char *) buf + n;
      conn->consumed_content += n;
      nread += n;
      len -= n;
    }
  }
  return nread;
}

int mg_write(struct mg_connection *conn, const void *buf, size_t len) {
  return (int) push(NULL, conn->client.sock, conn->ssl, (const char *) buf,
                    (int64_t) len);
}

int mg_printf(struct mg_connection *conn, const char *fmt, ...) {
  char buf[BUFSIZ];
  int len;
  va_list ap;

  va_start(ap, fmt);
  len = mg_vsnprintf(conn, buf, sizeof(buf), fmt, ap);
  va_end(ap);

  return mg_write(conn, buf, (size_t)len);
}

// URL-decode input buffer into destination buffer.
// 0-terminate the destination buffer. Return the length of decoded data.
// form-url-encoded data differs from URI encoding in a way that it
// uses '+' as character for space, see RFC 1866 section 8.2.1
// http://ftp.ics.uci.edu/pub/ietf/html/rfc1866.txt
static size_t url_decode(const char *src, size_t src_len, char *dst,
                         size_t dst_len, int is_form_url_encoded) {
  size_t i, j;
  int a, b;
#define HEXTOI(x) (isdigit(x) ? x - '0' : x - 'W')

  for (i = j = 0; i < src_len && j < dst_len - 1; i++, j++) {
    if (src[i] == '%' &&
        isxdigit(* (const unsigned char *) (src + i + 1)) &&
        isxdigit(* (const unsigned char *) (src + i + 2))) {
      a = tolower(* (const unsigned char *) (src + i + 1));
      b = tolower(* (const unsigned char *) (src + i + 2));
      dst[j] = (char) ((HEXTOI(a) << 4) | HEXTOI(b));
      i += 2;
    } else if (is_form_url_encoded && src[i] == '+') {
      dst[j] = ' ';
    } else {
      dst[j] = src[i];
    }
  }

  dst[j] = '\0'; // Null-terminate the destination

  return j;
}

// Scan given buffer and fetch the value of the given variable.
// It can be specified in query string, or in the POST data.
// Return NULL if the variable not found, or allocated 0-terminated value.
// It is caller's responsibility to free the returned value.
int mg_get_var(const char *buf, size_t buf_len, const char *name,
               char *dst, size_t dst_len) {
  const char *p, *e, *s;
  size_t name_len, len;

  name_len = strlen(name);
  e = buf + buf_len;
  len = -1;
  dst[0] = '\0';

  // buf is "var1=val1&var2=val2...". Find variable first
  for (p = buf; p != NULL && p + name_len < e; p++) {
    if ((p == buf || p[-1] == '&') && p[name_len] == '=' &&
        !mg_strncasecmp(name, p, name_len)) {

      // Point p to variable value
      p += name_len + 1;

      // Point s to the end of the value
      s = (const char *) memchr(p, '&', (size_t)(e - p));
      if (s == NULL) {
        s = e;
      }
      assert(s >= p);

      // Decode variable into destination buffer
      if ((size_t) (s - p) < dst_len) {
        len = url_decode(p, (size_t)(s - p), dst, dst_len, 1);
      }
      break;
    }
  }

  return len;
}

int mg_get_cookie(const struct mg_connection *conn, const char *cookie_name,
                  char *dst, size_t dst_size) {
  const char *s, *p, *end;
  int name_len, len = -1;

  dst[0] = '\0';
  if ((s = mg_get_header(conn, "Cookie")) == NULL) {
    return 0;
  }

  name_len = strlen(cookie_name);
  end = s + strlen(s);

  for (; (s = strstr(s, cookie_name)) != NULL; s += name_len)
    if (s[name_len] == '=') {
      s += name_len + 1;
      if ((p = strchr(s, ' ')) == NULL)
        p = end;
      if (p[-1] == ';')
        p--;
      if (*s == '"' && p[-1] == '"' && p > s + 1) {
        s++;
        p--;
      }
      if ((size_t) (p - s) < dst_size) {
        len = (p - s) + 1;
        mg_strlcpy(dst, s, (size_t)len);
      }
      break;
    }

  return len;
}

static int convert_uri_to_file_name(struct mg_connection *conn, char *buf,
                                    size_t buf_len, struct mgstat *st) {
  struct vec a, b;
  const char *rewrite, *uri = conn->request_info.uri;
  char *p;
  int match_len, stat_result;

  buf_len--;  // This is because memmove() for PATH_INFO may shift part
              // of the path one byte on the right.
  mg_snprintf(conn, buf, buf_len, "%s%s", conn->ctx->config[DOCUMENT_ROOT],
              uri);

  rewrite = conn->ctx->config[REWRITE];
  while ((rewrite = next_option(rewrite, &a, &b)) != NULL) {
    if ((match_len = match_prefix(a.ptr, a.len, uri)) > 0) {
      mg_snprintf(conn, buf, buf_len, "%.*s%s", b.len, b.ptr, uri + match_len);
      break;
    }
  }

#if defined(_WIN32) && !defined(__SYMBIAN32__)
  //change_slashes_to_backslashes(buf);
#endif // _WIN32

  if ((stat_result = mg_stat(buf, st)) != 0) {
    // Support PATH_INFO for CGI scripts.
    for (p = buf + strlen(buf); p > buf + 1; p--) {
      if (*p == '/') {
        *p = '\0';
        if (match_prefix(conn->ctx->config[CGI_EXTENSIONS],
                         strlen(conn->ctx->config[CGI_EXTENSIONS]), buf) > 0 &&
            (stat_result = mg_stat(buf, st)) == 0) {
          conn->path_info = p + 1;
          memmove(p + 2, p + 1, strlen(p + 1));
          p[1] = '/';
          break;
        } else {
          *p = '/';
          stat_result = -1;
        }
      }
    }
  }

  return stat_result;
}

static int sslize(struct mg_connection *conn, int (*func)(SSL *)) {
  return (conn->ssl = SSL_new(conn->ctx->ssl_ctx)) != NULL &&
    SSL_set_fd(conn->ssl, conn->client.sock) == 1 &&
    func(conn->ssl) == 1;
}

// Check whether full request is buffered. Return:
//   -1  if request is malformed
//    0  if request is not yet fully buffered
//   >0  actual request length, including last \r\n\r\n
static int get_request_len(const char *buf, int buflen) {
  const char *s, *e;
  int len = 0;

  DEBUG_TRACE(("buf: %p, len: %d", buf, buflen));
  for (s = buf, e = s + buflen - 1; len <= 0 && s < e; s++)
    // Control characters are not allowed but >=128 is.
    if (!isprint(* (const unsigned char *) s) && *s != '\r' &&
        *s != '\n' && * (const unsigned char *) s < 128) {
      len = -1;
    } else if (s[0] == '\n' && s[1] == '\n') {
      len = (int) (s - buf) + 2;
    } else if (s[0] == '\n' && &s[1] < e &&
        s[1] == '\r' && s[2] == '\n') {
      len = (int) (s - buf) + 3;
    }

  return len;
}

// Convert month to the month number. Return -1 on error, or month number
static int get_month_index(const char *s) {
  size_t i;

  for (i = 0; i < ARRAY_SIZE(month_names); i++)
    if (!strcmp(s, month_names[i]))
      return (int) i;

  return -1;
}

// Parse UTC date-time string, and return the corresponding time_t value.
static time_t parse_date_string(const char *datetime) {
  static const unsigned short days_before_month[] = {
    0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
  };
  char month_str[32];
  int second, minute, hour, day, month, year, leap_days, days;
  time_t result = (time_t) 0;

  if (((sscanf(datetime, "%d/%3s/%d %d:%d:%d",
               &day, month_str, &year, &hour, &minute, &second) == 6) ||
       (sscanf(datetime, "%d %3s %d %d:%d:%d",
               &day, month_str, &year, &hour, &minute, &second) == 6) ||
       (sscanf(datetime, "%*3s, %d %3s %d %d:%d:%d",
               &day, month_str, &year, &hour, &minute, &second) == 6) ||
       (sscanf(datetime, "%d-%3s-%d %d:%d:%d",
               &day, month_str, &year, &hour, &minute, &second) == 6)) &&
      year > 1970 &&
      (month = get_month_index(month_str)) != -1) {
    year -= 1970;
    leap_days = year / 4 - year / 100 + year / 400;
    days = year * 365 + days_before_month[month] + (day - 1) + leap_days;
    result = days * 24 * 3600 + hour * 3600 + minute * 60 + second;
  }

  return result;
}

// Protect against directory disclosure attack by removing '..',
// excessive '/' and '\' characters
static void remove_double_dots_and_double_slashes(char *s) {
  char *p = s;

  while (*s != '\0') {
    *p++ = *s++;
    if (IS_DIRSEP_CHAR(s[-1])) {
      // Skip all following slashes and backslashes
      while (IS_DIRSEP_CHAR(s[0])) {
        s++;
      }

      // Skip all double-dots
      while (*s == '.' && s[1] == '.') {
        s += 2;
      }
    }
  }
  *p = '\0';
}

static const struct {
  const char *extension;
  size_t ext_len;
  const char *mime_type;
  size_t mime_type_len;
} builtin_mime_types[] = {
  {".html", 5, "text/html",   9},
  {".htm", 4, "text/html",   9},
  {".shtm", 5, "text/html",   9},
  {".shtml", 6, "text/html",   9},
  {".css", 4, "text/css",   8},
  {".js",  3, "application/x-javascript", 24},
  {".ico", 4, "image/x-icon",   12},
  {".gif", 4, "image/gif",   9},
  {".jpg", 4, "image/jpeg",   10},
  {".jpeg", 5, "image/jpeg",   10},
  {".png", 4, "image/png",   9},
  {".svg", 4, "image/svg+xml",  13},
  {".torrent", 8, "application/x-bittorrent", 24},
  {".wav", 4, "audio/x-wav",   11},
  {".mp3", 4, "audio/x-mp3",   11},
  {".mid", 4, "audio/mid",   9},
  {".m3u", 4, "audio/x-mpegurl",  15},
  {".ram", 4, "audio/x-pn-realaudio",  20},
  {".xml", 4, "text/xml",   8},
  {".xslt", 5, "application/xml",  15},
  {".ra",  3, "audio/x-pn-realaudio",  20},
  {".doc", 4, "application/msword",  19},
  {".exe", 4, "application/octet-stream", 24},
  {".zip", 4, "application/x-zip-compressed", 28},
  {".xls", 4, "application/excel",  17},
  {".tgz", 4, "application/x-tar-gz",  20},
  {".tar", 4, "application/x-tar",  17},
  {".gz",  3, "application/x-gunzip",  20},
  {".arj", 4, "application/x-arj-compressed", 28},
  {".rar", 4, "application/x-arj-compressed", 28},
  {".rtf", 4, "application/rtf",  15},
  {".pdf", 4, "application/pdf",  15},
  {".swf", 4, "application/x-shockwave-flash",29},
  {".mpg", 4, "video/mpeg",   10},
  {".mpeg", 5, "video/mpeg",   10},
  {".mp4", 4, "video/mp4", 9},
  {".m4v", 4, "video/x-m4v", 11},
  {".asf", 4, "video/x-ms-asf",  14},
  {".avi", 4, "video/x-msvideo",  15},
  {".bmp", 4, "image/bmp",   9},
  {NULL,  0, NULL,    0}
};

// Look at the "path" extension and figure what mime type it has.
// Store mime type in the vector.
static void get_mime_type(struct mg_context *ctx, const char *path,
                          struct vec *vec) {
  struct vec ext_vec, mime_vec;
  const char *list, *ext;
  size_t i, path_len;

  path_len = strlen(path);

  // Scan user-defined mime types first, in case user wants to
  // override default mime types.
  list = ctx->config[EXTRA_MIME_TYPES];
  while ((list = next_option(list, &ext_vec, &mime_vec)) != NULL) {
    // ext now points to the path suffix
    ext = path + path_len - ext_vec.len;
    if (mg_strncasecmp(ext, ext_vec.ptr, ext_vec.len) == 0) {
      *vec = mime_vec;
      return;
    }
  }

  // Now scan built-in mime types
  for (i = 0; builtin_mime_types[i].extension != NULL; i++) {
    ext = path + (path_len - builtin_mime_types[i].ext_len);
    if (path_len > builtin_mime_types[i].ext_len &&
        mg_strcasecmp(ext, builtin_mime_types[i].extension) == 0) {
      vec->ptr = builtin_mime_types[i].mime_type;
      vec->len = builtin_mime_types[i].mime_type_len;
      return;
    }
  }

  // Nothing found. Fall back to "text/plain"
  vec->ptr = "text/plain";
  vec->len = 10;
}

#ifndef HAVE_MD5
typedef struct MD5Context {
  uint32_t buf[4];
  uint32_t bits[2];
  unsigned char in[64];
} MD5_CTX;

#if defined(__BYTE_ORDER) && (__BYTE_ORDER == 1234)
#define byteReverse(buf, len) // Do nothing
#else
static void byteReverse(unsigned char *buf, unsigned longs) {
  uint32_t t;
  do {
    t = (uint32_t) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
      ((unsigned) buf[1] << 8 | buf[0]);
    *(uint32_t *) buf = t;
    buf += 4;
  } while (--longs);
}
#endif

#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))

#define MD5STEP(f, w, x, y, z, data, s) \
  ( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )

// Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
// initialization constants.
static void MD5Init(MD5_CTX *ctx) {
  ctx->buf[0] = 0x67452301;
  ctx->buf[1] = 0xefcdab89;
  ctx->buf[2] = 0x98badcfe;
  ctx->buf[3] = 0x10325476;

  ctx->bits[0] = 0;
  ctx->bits[1] = 0;
}

static void MD5Transform(uint32_t buf[4], uint32_t const in[16]) {
  register uint32_t a, b, c, d;

  a = buf[0];
  b = buf[1];
  c = buf[2];
  d = buf[3];

  MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
  MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
  MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
  MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
  MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
  MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
  MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
  MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
  MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
  MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
  MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
  MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
  MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
  MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
  MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
  MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);

  MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
  MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
  MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
  MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
  MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
  MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
  MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
  MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
  MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
  MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
  MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
  MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
  MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
  MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
  MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
  MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);

  MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
  MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
  MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
  MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
  MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
  MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
  MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
  MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
  MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
  MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
  MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
  MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
  MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
  MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
  MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
  MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);

  MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
  MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
  MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
  MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
  MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
  MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
  MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
  MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
  MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
  MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
  MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
  MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
  MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
  MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
  MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
  MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);

  buf[0] += a;
  buf[1] += b;
  buf[2] += c;
  buf[3] += d;
}

static void MD5Update(MD5_CTX *ctx, unsigned char const *buf, unsigned len) {
  uint32_t t;

  t = ctx->bits[0];
  if ((ctx->bits[0] = t + ((uint32_t) len << 3)) < t)
    ctx->bits[1]++;
  ctx->bits[1] += len >> 29;

  t = (t >> 3) & 0x3f;

  if (t) {
    unsigned char *p = (unsigned char *) ctx->in + t;

    t = 64 - t;
    if (len < t) {
      memcpy(p, buf, len);
      return;
    }
    memcpy(p, buf, t);
    byteReverse(ctx->in, 16);
    MD5Transform(ctx->buf, (uint32_t *) ctx->in);
    buf += t;
    len -= t;
  }

  while (len >= 64) {
    memcpy(ctx->in, buf, 64);
    byteReverse(ctx->in, 16);
    MD5Transform(ctx->buf, (uint32_t *) ctx->in);
    buf += 64;
    len -= 64;
  }

  memcpy(ctx->in, buf, len);
}

static void MD5Final(unsigned char digest[16], MD5_CTX *ctx) {
  unsigned count;
  unsigned char *p;

  count = (ctx->bits[0] >> 3) & 0x3F;

  p = ctx->in + count;
  *p++ = 0x80;
  count = 64 - 1 - count;
  if (count < 8) {
    memset(p, 0, count);
    byteReverse(ctx->in, 16);
    MD5Transform(ctx->buf, (uint32_t *) ctx->in);
    memset(ctx->in, 0, 56);
  } else {
    memset(p, 0, count - 8);
  }
  byteReverse(ctx->in, 14);

  ((uint32_t *) ctx->in)[14] = ctx->bits[0];
  ((uint32_t *) ctx->in)[15] = ctx->bits[1];

  MD5Transform(ctx->buf, (uint32_t *) ctx->in);
  byteReverse((unsigned char *) ctx->buf, 4);
  memcpy(digest, ctx->buf, 16);
  memset((char *) ctx, 0, sizeof(*ctx));
}
#endif // !HAVE_MD5

// Stringify binary data. Output buffer must be twice as big as input,
// because each byte takes 2 bytes in string representation
static void bin2str(char *to, const unsigned char *p, size_t len) {
  static const char *hex = "0123456789abcdef";

  for (; len--; p++) {
    *to++ = hex[p[0] >> 4];
    *to++ = hex[p[0] & 0x0f];
  }
  *to = '\0';
}

// Return stringified MD5 hash for list of vectors. Buffer must be 33 bytes.
void mg_md5(char *buf, ...) {
  unsigned char hash[16];
  const char *p;
  va_list ap;
  MD5_CTX ctx;

  MD5Init(&ctx);

  va_start(ap, buf);
  while ((p = va_arg(ap, const char *)) != NULL) {
    MD5Update(&ctx, (const unsigned char *) p, (unsigned) strlen(p));
  }
  va_end(ap);

  MD5Final(hash, &ctx);
  bin2str(buf, hash, sizeof(hash));
}


// Return stringified MD5 hash for a file
void mg_md5_file(char *buf, const char* file) {
  unsigned char hash[16];
  MD5_CTX ctx;

  MD5Init(&ctx);
  FILE* f = fopen(file, "rb");
  if (f == NULL) {
    buf[0] = '\0';
    return;
  }
  char readbuf[4096];
  while (1) {
    unsigned readlen = fread(readbuf, 1, 4096, f);
    MD5Update(&ctx, (const unsigned char *) readbuf, readlen);
    if (readlen == 0) break;
  }
  fclose(f);
  MD5Final(hash, &ctx);
  bin2str(buf, hash, sizeof(hash));
}


// Check the user's password, return 1 if OK
static int check_password(const char *method, const char *ha1, const char *uri,
                          const char *nonce, const char *nc, const char *cnonce,
                          const char *qop, const char *response) {
  char ha2[32 + 1], expected_response[32 + 1];

  // Some of the parameters may be NULL
  if (method == NULL || nonce == NULL || nc == NULL || cnonce == NULL ||
      qop == NULL || response == NULL) {
    return 0;
  }

  // NOTE(lsm): due to a bug in MSIE, we do not compare the URI
  // TODO(lsm): check for authentication timeout
  if (// strcmp(dig->uri, c->ouri) != 0 ||
      strlen(response) != 32
      // || now - strtoul(dig->nonce, NULL, 10) > 3600
      ) {
    return 0;
  }

  mg_md5(ha2, method, ":", uri, NULL);
  mg_md5(expected_response, ha1, ":", nonce, ":", nc,
      ":", cnonce, ":", qop, ":", ha2, NULL);

  return mg_strcasecmp(response, expected_response) == 0;
}

// Use the global passwords file, if specified by auth_gpass option,
// or search for .htpasswd in the requested directory.
static FILE *open_auth_file(struct mg_connection *conn, const char *path) {
  struct mg_context *ctx = conn->ctx;
  char name[PATH_MAX];
  const char *p, *e;
  struct mgstat st;
  FILE *fp;

  if (ctx->config[GLOBAL_PASSWORDS_FILE] != NULL) {
    // Use global passwords file
    fp =  mg_fopen(ctx->config[GLOBAL_PASSWORDS_FILE], "r");
    if (fp == NULL)
      cry(fc(ctx), "fopen(%s): %s",
          ctx->config[GLOBAL_PASSWORDS_FILE], strerror(ERRNO));
  } else if (!mg_stat(path, &st) && st.is_directory) {
    (void) mg_snprintf(conn, name, sizeof(name), "%s%c%s",
        path, DIRSEP, PASSWORDS_FILE_NAME);
    fp = mg_fopen(name, "r");
  } else {
     // Try to find .htpasswd in requested directory.
    for (p = path, e = p + strlen(p) - 1; e > p; e--)
      if (IS_DIRSEP_CHAR(*e))
        break;
    (void) mg_snprintf(conn, name, sizeof(name), "%.*s%c%s",
        (int) (e - p), p, DIRSEP, PASSWORDS_FILE_NAME);
    fp = mg_fopen(name, "r");
  }

  return fp;
}

// Parsed Authorization header
struct ah {
  char *user, *uri, *cnonce, *response, *qop, *nc, *nonce;
};

static int parse_auth_header(struct mg_connection *conn, char *buf,
                             size_t buf_size, struct ah *ah) {
  char *name, *value, *s;
  const char *auth_header;

  if ((auth_header = mg_get_header(conn, "Authorization")) == NULL ||
      mg_strncasecmp(auth_header, "Digest ", 7) != 0) {
    return 0;
  }

  // Make modifiable copy of the auth header
  (void) mg_strlcpy(buf, auth_header + 7, buf_size);

  s = buf;
  (void) memset(ah, 0, sizeof(*ah));

  // Parse authorization header
  for (;;) {
    // Gobble initial spaces
    while (isspace(* (unsigned char *) s)) {
      s++;
    }
    name = skip_quoted(&s, "=", " ", 0);
    // Value is either quote-delimited, or ends at first comma or space.
    if (s[0] == '\"') {
      s++;
      value = skip_quoted(&s, "\"", " ", '\\');
      if (s[0] == ',') {
        s++;
      }
    } else {
      value = skip_quoted(&s, ", ", " ", 0);  // IE uses commas, FF uses spaces
    }
    if (*name == '\0') {
      break;
    }

    if (!strcmp(name, "username")) {
      ah->user = value;
    } else if (!strcmp(name, "cnonce")) {
      ah->cnonce = value;
    } else if (!strcmp(name, "response")) {
      ah->response = value;
    } else if (!strcmp(name, "uri")) {
      ah->uri = value;
    } else if (!strcmp(name, "qop")) {
      ah->qop = value;
    } else if (!strcmp(name, "nc")) {
      ah->nc = value;
    } else if (!strcmp(name, "nonce")) {
      ah->nonce = value;
    }
  }

  // CGI needs it as REMOTE_USER
  if (ah->user != NULL) {
    conn->request_info.remote_user = mg_strdup(ah->user);
  } else {
    return 0;
  }

  return 1;
}

// Authorize against the opened passwords file. Return 1 if authorized.
static int authorize(struct mg_connection *conn, FILE *fp) {
  struct ah ah;
  char line[256], f_user[256], ha1[256], f_domain[256], buf[BUFSIZ];

  if (!parse_auth_header(conn, buf, sizeof(buf), &ah)) {
    return 0;
  }

  // Loop over passwords file
  while (fgets(line, sizeof(line), fp) != NULL) {
    if (sscanf(line, "%[^:]:%[^:]:%s", f_user, f_domain, ha1) != 3) {
      continue;
    }

    if (!strcmp(ah.user, f_user) &&
        !strcmp(conn->ctx->config[AUTHENTICATION_DOMAIN], f_domain))
      return check_password(
            conn->request_info.request_method,
            ha1, ah.uri, ah.nonce, ah.nc, ah.cnonce, ah.qop,
            ah.response);
  }

  return 0;
}

// Return 1 if request is authorised, 0 otherwise.
static int check_authorization(struct mg_connection *conn, const char *path) {
  FILE *fp;
  char fname[PATH_MAX];
  struct vec uri_vec, filename_vec;
  const char *list;
  int authorized;

  fp = NULL;
  authorized = 1;

  list = conn->ctx->config[PROTECT_URI];
  while ((list = next_option(list, &uri_vec, &filename_vec)) != NULL) {
    if (!memcmp(conn->request_info.uri, uri_vec.ptr, uri_vec.len)) {
      (void) mg_snprintf(conn, fname, sizeof(fname), "%.*s",
          filename_vec.len, filename_vec.ptr);
      if ((fp = mg_fopen(fname, "r")) == NULL) {
        cry(conn, "%s: cannot open %s: %s", __func__, fname, strerror(errno));
      }
      break;
    }
  }

  if (fp == NULL) {
    fp = open_auth_file(conn, path);
  }

  if (fp != NULL) {
    authorized = authorize(conn, fp);
    (void) fclose(fp);
  }

  return authorized;
}

static void send_authorization_request(struct mg_connection *conn) {
  conn->request_info.status_code = 401;
  (void) mg_printf(conn,
      "HTTP/1.1 401 Unauthorized\r\n"
      "Content-Length: 0\r\n"
      "WWW-Authenticate: Digest qop=\"auth\", "
      "realm=\"%s\", nonce=\"%lu\"\r\n\r\n",
      conn->ctx->config[AUTHENTICATION_DOMAIN],
      (unsigned long) time(NULL));
}

static int is_authorized_for_put(struct mg_connection *conn) {
  FILE *fp;
  int ret = 0;

  fp = conn->ctx->config[PUT_DELETE_PASSWORDS_FILE] == NULL ? NULL :
    mg_fopen(conn->ctx->config[PUT_DELETE_PASSWORDS_FILE], "r");

  if (fp != NULL) {
    ret = authorize(conn, fp);
    (void) fclose(fp);
  }

  return ret;
}

int mg_modify_passwords_file(const char *fname, const char *domain,
                             const char *user, const char *pass) {
  int found;
  char line[512], u[512], d[512], ha1[33], tmp[PATH_MAX];
  FILE *fp, *fp2;

  found = 0;
  fp = fp2 = NULL;

  // Regard empty password as no password - remove user record.
  if (pass != NULL && pass[0] == '\0') {
    pass = NULL;
  }

  (void) snprintf(tmp, sizeof(tmp), "%s.tmp", fname);

  // Create the file if does not exist
  if ((fp = mg_fopen(fname, "a+")) != NULL) {
    (void) fclose(fp);
  }

  // Open the given file and temporary file
  if ((fp = mg_fopen(fname, "r")) == NULL) {
    return 0;
  } else if ((fp2 = mg_fopen(tmp, "w+")) == NULL) {
    fclose(fp);
    return 0;
  }

  // Copy the stuff to temporary file
  while (fgets(line, sizeof(line), fp) != NULL) {
    if (sscanf(line, "%[^:]:%[^:]:%*s", u, d) != 2) {
      continue;
    }

    if (!strcmp(u, user) && !strcmp(d, domain)) {
      found++;
      if (pass != NULL) {
        mg_md5(ha1, user, ":", domain, ":", pass, NULL);
        fprintf(fp2, "%s:%s:%s\n", user, domain, ha1);
      }
    } else {
      (void) fprintf(fp2, "%s", line);
    }
  }

  // If new user, just add it
  if (!found && pass != NULL) {
    mg_md5(ha1, user, ":", domain, ":", pass, NULL);
    (void) fprintf(fp2, "%s:%s:%s\n", user, domain, ha1);
  }

  // Close files
  (void) fclose(fp);
  (void) fclose(fp2);

  // Put the temp file in place of real file
  (void) mg_remove(fname);
  (void) mg_rename(tmp, fname);

  return 1;
}

struct de {
  struct mg_connection *conn;
  char *file_name;
  struct mgstat st;
};

static void url_encode(const char *src, char *dst, size_t dst_len) {
  static const char *dont_escape = "._-$,;~()";
  static const char *hex = "0123456789abcdef";
  const char *end = dst + dst_len - 1;

  for (; *src != '\0' && dst < end; src++, dst++) {
    if (isalnum(*(const unsigned char *) src) ||
        strchr(dont_escape, * (const unsigned char *) src) != NULL) {
      *dst = *src;
    } else if (dst + 2 < end) {
      dst[0] = '%';
      dst[1] = hex[(* (const unsigned char *) src) >> 4];
      dst[2] = hex[(* (const unsigned char *) src) & 0xf];
      dst += 2;
    }
  }

  *dst = '\0';
}

static void print_dir_entry(struct de *de) {
  char size[64], mod[64], href[PATH_MAX];

  if (de->st.is_directory) {
    (void) mg_snprintf(de->conn, size, sizeof(size), "%s", "[DIRECTORY]");
  } else {
     // We use (signed) cast below because MSVC 6 compiler cannot
     // convert unsigned __int64 to double. Sigh.
    if (de->st.size < 1024) {
      (void) mg_snprintf(de->conn, size, sizeof(size),
          "%lu", (unsigned long) de->st.size);
    } else if (de->st.size < 1024 * 1024) {
      (void) mg_snprintf(de->conn, size, sizeof(size),
          "%.1fk", (double) de->st.size / 1024.0);
    } else if (de->st.size < 1024 * 1024 * 1024) {
      (void) mg_snprintf(de->conn, size, sizeof(size),
          "%.1fM", (double) de->st.size / 1048576);
    } else {
      (void) mg_snprintf(de->conn, size, sizeof(size),
          "%.1fG", (double) de->st.size / 1073741824);
    }
  }
  (void) strftime(mod, sizeof(mod), "%d-%b-%Y %H:%M", localtime(&de->st.mtime));
  url_encode(de->file_name, href, sizeof(href));
  de->conn->num_bytes_sent += mg_printf(de->conn,
      "<tr><td><a href=\"%s%s%s\">%s%s</a></td>"
      "<td>&nbsp;%s</td><td>&nbsp;&nbsp;%s</td></tr>\n",
      de->conn->request_info.uri, href, de->st.is_directory ? "/" : "",
      de->file_name, de->st.is_directory ? "/" : "", mod, size);
}

// This function is called from send_directory() and used for
// sorting directory entries by size, or name, or modification time.
// On windows, __cdecl specification is needed in case if project is built
// with __stdcall convention. qsort always requires __cdels callback.
static int WINCDECL compare_dir_entries(const void *p1, const void *p2) {
  const struct de *a = (const struct de *) p1, *b = (const struct de *) p2;
  const char *query_string = a->conn->request_info.query_string;
  int cmp_result = 0;

  if (query_string == NULL) {
    query_string = "na";
  }

  if (a->st.is_directory && !b->st.is_directory) {
    return -1;  // Always put directories on top
  } else if (!a->st.is_directory && b->st.is_directory) {
    return 1;   // Always put directories on top
  } else if (*query_string == 'n') {
    cmp_result = strcmp(a->file_name, b->file_name);
  } else if (*query_string == 's') {
    cmp_result = a->st.size == b->st.size ? 0 :
      a->st.size > b->st.size ? 1 : -1;
  } else if (*query_string == 'd') {
    cmp_result = a->st.mtime == b->st.mtime ? 0 :
      a->st.mtime > b->st.mtime ? 1 : -1;
  }

  return query_string[1] == 'd' ? -cmp_result : cmp_result;
}

static int scan_directory(struct mg_connection *conn, const char *dir,
                          void *data, void (*cb)(struct de *, void *)) {
  char path[PATH_MAX];
  struct dirent *dp;
  DIR *dirp;
  struct de de;

  if ((dirp = opendir(dir)) == NULL) {
    return 0;
  } else {
    de.conn = conn;

    while ((dp = readdir(dirp)) != NULL) {
      // Do not show current dir and passwords file
      if (!strcmp(dp->d_name, ".") ||
          !strcmp(dp->d_name, "..") ||
          !strcmp(dp->d_name, PASSWORDS_FILE_NAME))
        continue;

      mg_snprintf(conn, path, sizeof(path), "%s%c%s", dir, DIRSEP, dp->d_name);

      // If we don't memset stat structure to zero, mtime will have
      // garbage and strftime() will segfault later on in
      // print_dir_entry(). memset is required only if mg_stat()
      // fails. For more details, see
      // http://code.google.com/p/mongoose/issues/detail?id=79
      if (mg_stat(path, &de.st) != 0) {
        memset(&de.st, 0, sizeof(de.st));
      }
      de.file_name = dp->d_name;

      cb(&de, data);
    }
    (void) closedir(dirp);
  }
  return 1;
}

struct dir_scan_data {
  struct de *entries;
  int num_entries;
  int arr_size;
};

static void dir_scan_callback(struct de *de, void *data) {
  struct dir_scan_data *dsd = (struct dir_scan_data *) data;

  if (dsd->entries == NULL || dsd->num_entries >= dsd->arr_size) {
    dsd->arr_size *= 2;
    dsd->entries = (struct de *) realloc(dsd->entries, dsd->arr_size *
                                         sizeof(dsd->entries[0]));
  }
  if (dsd->entries == NULL) {
    // TODO(lsm): propagate an error to the caller
    dsd->num_entries = 0;
  } else {
    dsd->entries[dsd->num_entries].file_name = mg_strdup(de->file_name);
    dsd->entries[dsd->num_entries].st = de->st;
    dsd->entries[dsd->num_entries].conn = de->conn;
    dsd->num_entries++;
  }
}

static void handle_directory_request(struct mg_connection *conn,
                                     const char *dir) {
  int i, sort_direction;
  struct dir_scan_data data = { NULL, 0, 128 };

  if (!scan_directory(conn, dir, &data, dir_scan_callback)) {
    send_http_error(conn, 500, "Cannot open directory",
                    "Error: opendir(%s): %s", dir, strerror(ERRNO));
    return;
  }

  sort_direction = conn->request_info.query_string != NULL &&
    conn->request_info.query_string[1] == 'd' ? 'a' : 'd';

  conn->must_close = 1;
  mg_printf(conn, "%s",
            "HTTP/1.1 200 OK\r\n"
            "Connection: close\r\n"
            "Content-Type: text/html; charset=utf-8\r\n\r\n");

  conn->num_bytes_sent += mg_printf(conn,
      "<html><head><title>Index of %s</title>"
      "<style>th {text-align: left;}</style></head>"
      "<body><h1>Index of %s</h1><pre><table cellpadding=\"0\">"
      "<tr><th><a href=\"?n%c\">Name</a></th>"
      "<th><a href=\"?d%c\">Modified</a></th>"
      "<th><a href=\"?s%c\">Size</a></th></tr>"
      "<tr><td colspan=\"3\"><hr></td></tr>",
      conn->request_info.uri, conn->request_info.uri,
      sort_direction, sort_direction, sort_direction);

  // Print first entry - link to a parent directory
  conn->num_bytes_sent += mg_printf(conn,
      "<tr><td><a href=\"%s%s\">%s</a></td>"
      "<td>&nbsp;%s</td><td>&nbsp;&nbsp;%s</td></tr>\n",
      conn->request_info.uri, "..", "Parent directory", "-", "-");

  // Sort and print directory entries
  qsort(data.entries, (size_t) data.num_entries, sizeof(data.entries[0]),
        compare_dir_entries);
  for (i = 0; i < data.num_entries; i++) {
    print_dir_entry(&data.entries[i]);
    free(data.entries[i].file_name);
  }
  free(data.entries);

  conn->num_bytes_sent += mg_printf(conn, "%s", "</table></body></html>");
  conn->request_info.status_code = 200;
}

// Send len bytes from the opened file to the client.
static void send_file_data(struct mg_connection *conn, FILE *fp, int64_t len) {
  char buf[BUFSIZ];
  int to_read, num_read, num_written;

  while (len > 0) {
    // Calculate how much to read from the file in the buffer
    to_read = sizeof(buf);
    if ((int64_t) to_read > len)
      to_read = (int) len;

    // Read from file, exit the loop on error
    if ((num_read = fread(buf, 1, (size_t)to_read, fp)) == 0)
      break;

    // Send read bytes to the client, exit the loop on error
    if ((num_written = mg_write(conn, buf, (size_t)num_read)) != num_read)
      break;

    // Both read and were successful, adjust counters
    conn->num_bytes_sent += num_written;
    len -= num_written;
  }
}

static int parse_range_header(const char *header, int64_t *a, int64_t *b) {
  return sscanf(header, "bytes=%" INT64_FMT "-%" INT64_FMT, a, b);
}

static void gmt_time_string(char *buf, size_t buf_len, time_t *t) {
  strftime(buf, buf_len, "%a, %d %b %Y %H:%M:%S GMT", gmtime(t));
}

static void handle_file_request(struct mg_connection *conn, const char *path,
                                struct mgstat *stp) {
  char date[64], lm[64], etag[64], range[64];
  const char *msg = "OK", *hdr;
  time_t curtime = time(NULL);
  int64_t cl, r1, r2;
  struct vec mime_vec;
  FILE *fp;
  int n;

  get_mime_type(conn->ctx, path, &mime_vec);
  cl = stp->size;
  conn->request_info.status_code = 200;
  range[0] = '\0';

  if ((fp = mg_fopen(path, "rb")) == NULL) {
    send_http_error(conn, 500, http_500_error,
        "fopen(%s): %s", path, strerror(ERRNO));
    return;
  }
  set_close_on_exec(fileno(fp));

  // If Range: header specified, act accordingly
  r1 = r2 = 0;
  hdr = mg_get_header(conn, "Range");
  if (hdr != NULL && (n = parse_range_header(hdr, &r1, &r2)) > 0) {
    conn->request_info.status_code = 206;
    (void) fseeko(fp, (off_t) r1, SEEK_SET);
    cl = n == 2 ? r2 - r1 + 1: cl - r1;
    (void) mg_snprintf(conn, range, sizeof(range),
        "Content-Range: bytes "
        "%" INT64_FMT "-%"
        INT64_FMT "/%" INT64_FMT "\r\n",
        r1, r1 + cl - 1, stp->size);
    msg = "Partial Content";
  }

  // Prepare Etag, Date, Last-Modified headers. Must be in UTC, according to
  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3
  gmt_time_string(date, sizeof(date), &curtime);
  gmt_time_string(lm, sizeof(lm), &stp->mtime);
  (void) mg_snprintf(conn, etag, sizeof(etag), "%lx.%lx",
      (unsigned long) stp->mtime, (unsigned long) stp->size);

  (void) mg_printf(conn,
      "HTTP/1.1 %d %s\r\n"
      "Date: %s\r\n"
      "Last-Modified: %s\r\n"
      "Etag: \"%s\"\r\n"
      "Content-Type: %.*s\r\n"
      "Content-Length: %" INT64_FMT "\r\n"
      "Connection: %s\r\n"
      "Accept-Ranges: bytes\r\n"
      "%s\r\n",
      conn->request_info.status_code, msg, date, lm, etag, (int) mime_vec.len,
      mime_vec.ptr, cl, suggest_connection_header(conn), range);

  if (strcmp(conn->request_info.request_method, "HEAD") != 0) {
    send_file_data(conn, fp, cl);
  }
  (void) fclose(fp);
}

void mg_send_file(struct mg_connection *conn, const char *path) {
  struct mgstat st;
  if (mg_stat(path, &st) == 0) {
    handle_file_request(conn, path, &st);
  } else {
    send_http_error(conn, 404, "Not Found", "%s", "File not found");
  }
}


// Parse HTTP headers from the given buffer, advance buffer to the point
// where parsing stopped.
static void parse_http_headers(char **buf, struct mg_request_info *ri) {
  int i;

  for (i = 0; i < (int) ARRAY_SIZE(ri->http_headers); i++) {
    ri->http_headers[i].name = skip_quoted(buf, ":", " ", 0);
    ri->http_headers[i].value = skip(buf, "\r\n");
    if (ri->http_headers[i].name[0] == '\0')
      break;
    ri->num_headers = i + 1;
  }
}

static int is_valid_http_method(const char *method) {
  return !strcmp(method, "GET") || !strcmp(method, "POST") ||
    !strcmp(method, "HEAD") || !strcmp(method, "CONNECT") ||
    !strcmp(method, "PUT") || !strcmp(method, "DELETE") ||
    !strcmp(method, "OPTIONS") || !strcmp(method, "PROPFIND");
}

// Parse HTTP request, fill in mg_request_info structure.
static int parse_http_request(char *buf, struct mg_request_info *ri) {
  int status = 0;

  // RFC says that all initial whitespaces should be ingored
  while (*buf != '\0' && isspace(* (unsigned char *) buf)) {
    buf++;
  }

  ri->request_method = skip(&buf, " ");
  ri->uri = skip(&buf, " ");
  ri->http_version = skip(&buf, "\r\n");

  if (is_valid_http_method(ri->request_method) &&
      strncmp(ri->http_version, "HTTP/", 5) == 0) {
    ri->http_version += 5;   // Skip "HTTP/"
    parse_http_headers(&buf, ri);
    status = 1;
  }

  return status;
}

// Keep reading the input (either opened file descriptor fd, or socket sock,
// or SSL descriptor ssl) into buffer buf, until \r\n\r\n appears in the
// buffer (which marks the end of HTTP request). Buffer buf may already
// have some data. The length of the data is stored in nread.
// Upon every read operation, increase nread by the number of bytes read.
static int read_request(FILE *fp, SOCKET sock, SSL *ssl, char *buf, int bufsiz,
                        int *nread) {
  int n, request_len;

  request_len = 0;
  while (*nread < bufsiz && request_len == 0) {
    n = pull(fp, sock, ssl, buf + *nread, bufsiz - *nread);
    if (n <= 0) {
      break;
    } else {
      *nread += n;
      request_len = get_request_len(buf, *nread);
    }
  }

  return request_len;
}

// For given directory path, substitute it to valid index file.
// Return 0 if index file has been found, -1 if not found.
// If the file is found, it's stats is returned in stp.
static int substitute_index_file(struct mg_connection *conn, char *path,
                                 size_t path_len, struct mgstat *stp) {
  const char *list = conn->ctx->config[INDEX_FILES];
  struct mgstat st;
  struct vec filename_vec;
  size_t n = strlen(path);
  int found = 0;

  // The 'path' given to us points to the directory. Remove all trailing
  // directory separator characters from the end of the path, and
  // then append single directory separator character.
  while (n > 0 && IS_DIRSEP_CHAR(path[n - 1])) {
    n--;
  }
  path[n] = DIRSEP;

  // Traverse index files list. For each entry, append it to the given
  // path and see if the file exists. If it exists, break the loop
  while ((list = next_option(list, &filename_vec, NULL)) != NULL) {

    // Ignore too long entries that may overflow path buffer
    if (filename_vec.len > path_len - (n + 2))
      continue;

    // Prepare full path to the index file
    (void) mg_strlcpy(path + n + 1, filename_vec.ptr, filename_vec.len + 1);

    // Does it exist?
    if (mg_stat(path, &st) == 0) {
      // Yes it does, break the loop
      *stp = st;
      found = 1;
      break;
    }
  }

  // If no index file exists, restore directory path
  if (!found) {
    path[n] = '\0';
  }

  return found;
}

// Return True if we should reply 304 Not Modified.
static int is_not_modified(const struct mg_connection *conn,
                           const struct mgstat *stp) {
  const char *ims = mg_get_header(conn, "If-Modified-Since");
  return ims != NULL && stp->mtime <= parse_date_string(ims);
}

static int forward_body_data(struct mg_connection *conn, FILE *fp,
                             SOCKET sock, SSL *ssl) {
  const char *expect, *buffered;
  char buf[BUFSIZ];
  int to_read, nread, buffered_len, success = 0;

  expect = mg_get_header(conn, "Expect");
  assert(fp != NULL);

  if (conn->content_len == -1) {
    send_http_error(conn, 411, "Length Required", "");
  } else if (expect != NULL && mg_strcasecmp(expect, "100-continue")) {
    send_http_error(conn, 417, "Expectation Failed", "");
  } else {
    if (expect != NULL) {
      (void) mg_printf(conn, "%s", "HTTP/1.1 100 Continue\r\n\r\n");
    }

    buffered = conn->buf + conn->request_len;
    buffered_len = conn->data_len - conn->request_len;
    assert(buffered_len >= 0);
    assert(conn->consumed_content == 0);

    if (buffered_len > 0) {
      if ((int64_t) buffered_len > conn->content_len) {
        buffered_len = (int) conn->content_len;
      }
      push(fp, sock, ssl, buffered, (int64_t) buffered_len);
      conn->consumed_content += buffered_len;
    }

    while (conn->consumed_content < conn->content_len) {
      to_read = sizeof(buf);
      if ((int64_t) to_read > conn->content_len - conn->consumed_content) {
        to_read = (int) (conn->content_len - conn->consumed_content);
      }
      nread = pull(NULL, conn->client.sock, conn->ssl, buf, to_read);
      if (nread <= 0 || push(fp, sock, ssl, buf, nread) != nread) {
        break;
      }
      conn->consumed_content += nread;
    }

    if (conn->consumed_content == conn->content_len) {
      success = 1;
    }

    // Each error code path in this function must send an error
    if (!success) {
      send_http_error(conn, 577, http_500_error, "");
    }
  }

  return success;
}

#if !defined(NO_CGI)
// This structure helps to create an environment for the spawned CGI program.
// Environment is an array of "VARIABLE=VALUE\0" ASCIIZ strings,
// last element must be NULL.
// However, on Windows there is a requirement that all these VARIABLE=VALUE\0
// strings must reside in a contiguous buffer. The end of the buffer is
// marked by two '\0' characters.
// We satisfy both worlds: we create an envp array (which is vars), all
// entries are actually pointers inside buf.
struct cgi_env_block {
  struct mg_connection *conn;
  char buf[CGI_ENVIRONMENT_SIZE]; // Environment buffer
  int len; // Space taken
  char *vars[MAX_CGI_ENVIR_VARS]; // char **envp
  int nvars; // Number of variables
};

// Append VARIABLE=VALUE\0 string to the buffer, and add a respective
// pointer into the vars array.
static char *addenv(struct cgi_env_block *block, const char *fmt, ...) {
  int n, space;
  char *added;
  va_list ap;

  // Calculate how much space is left in the buffer
  space = sizeof(block->buf) - block->len - 2;
  assert(space >= 0);

  // Make a pointer to the free space int the buffer
  added = block->buf + block->len;

  // Copy VARIABLE=VALUE\0 string into the free space
  va_start(ap, fmt);
  n = mg_vsnprintf(block->conn, added, (size_t) space, fmt, ap);
  va_end(ap);

  // Make sure we do not overflow buffer and the envp array
  if (n > 0 && n < space &&
      block->nvars < (int) ARRAY_SIZE(block->vars) - 2) {
    // Append a pointer to the added string into the envp array
    block->vars[block->nvars++] = block->buf + block->len;
    // Bump up used length counter. Include \0 terminator
    block->len += n + 1;
  }

  return added;
}

static void prepare_cgi_environment(struct mg_connection *conn,
                                    const char *prog,
                                    struct cgi_env_block *blk) {
  const char *s, *slash;
  struct vec var_vec;
  char *p, src_addr[20];
  int  i;

  blk->len = blk->nvars = 0;
  blk->conn = conn;
  sockaddr_to_string(src_addr, sizeof(src_addr), &conn->client.rsa);

  addenv(blk, "SERVER_NAME=%s", conn->ctx->config[AUTHENTICATION_DOMAIN]);
  addenv(blk, "SERVER_ROOT=%s", conn->ctx->config[DOCUMENT_ROOT]);
  addenv(blk, "DOCUMENT_ROOT=%s", conn->ctx->config[DOCUMENT_ROOT]);

  // Prepare the environment block
  addenv(blk, "%s", "GATEWAY_INTERFACE=CGI/1.1");
  addenv(blk, "%s", "SERVER_PROTOCOL=HTTP/1.1");
  addenv(blk, "%s", "REDIRECT_STATUS=200"); // For PHP

  // TODO(lsm): fix this for IPv6 case
  addenv(blk, "SERVER_PORT=%d", ntohs(conn->client.lsa.sin.sin_port));

  addenv(blk, "REQUEST_METHOD=%s", conn->request_info.request_method);
  addenv(blk, "REMOTE_ADDR=%s", src_addr);
  addenv(blk, "REMOTE_PORT=%d", conn->request_info.remote_port);
  addenv(blk, "REQUEST_URI=%s", conn->request_info.uri);

  // SCRIPT_NAME
  assert(conn->request_info.uri[0] == '/');
  slash = strrchr(conn->request_info.uri, '/');
  if ((s = strrchr(prog, '/')) == NULL)
    s = prog;
  addenv(blk, "SCRIPT_NAME=%.*s%s", slash - conn->request_info.uri,
         conn->request_info.uri, s);

  addenv(blk, "SCRIPT_FILENAME=%s", prog);
  addenv(blk, "PATH_TRANSLATED=%s", prog);
  addenv(blk, "HTTPS=%s", conn->ssl == NULL ? "off" : "on");

  if ((s = mg_get_header(conn, "Content-Type")) != NULL)
    addenv(blk, "CONTENT_TYPE=%s", s);

  if (conn->request_info.query_string != NULL)
    addenv(blk, "QUERY_STRING=%s", conn->request_info.query_string);

  if ((s = mg_get_header(conn, "Content-Length")) != NULL)
    addenv(blk, "CONTENT_LENGTH=%s", s);

  if ((s = getenv("PATH")) != NULL)
    addenv(blk, "PATH=%s", s);

  if (conn->path_info != NULL) {
    addenv(blk, "PATH_INFO=%s", conn->path_info);
  }

#if defined(_WIN32)
  if ((s = getenv("COMSPEC")) != NULL) {
    addenv(blk, "COMSPEC=%s", s);
  }
  if ((s = getenv("SYSTEMROOT")) != NULL) {
    addenv(blk, "SYSTEMROOT=%s", s);
  }
  if ((s = getenv("SystemDrive")) != NULL) {
    addenv(blk, "SystemDrive=%s", s);
  }
#else
  if ((s = getenv("LD_LIBRARY_PATH")) != NULL)
    addenv(blk, "LD_LIBRARY_PATH=%s", s);
#endif // _WIN32

  if ((s = getenv("PERLLIB")) != NULL)
    addenv(blk, "PERLLIB=%s", s);

  if (conn->request_info.remote_user != NULL) {
    addenv(blk, "REMOTE_USER=%s", conn->request_info.remote_user);
    addenv(blk, "%s", "AUTH_TYPE=Digest");
  }

  // Add all headers as HTTP_* variables
  for (i = 0; i < conn->request_info.num_headers; i++) {
    p = addenv(blk, "HTTP_%s=%s",
        conn->request_info.http_headers[i].name,
        conn->request_info.http_headers[i].value);

    // Convert variable name into uppercase, and change - to _
    for (; *p != '=' && *p != '\0'; p++) {
      if (*p == '-')
        *p = '_';
      *p = (char) toupper(* (unsigned char *) p);
    }
  }

  // Add user-specified variables
  s = conn->ctx->config[CGI_ENVIRONMENT];
  while ((s = next_option(s, &var_vec, NULL)) != NULL) {
    addenv(blk, "%.*s", var_vec.len, var_vec.ptr);
  }

  blk->vars[blk->nvars++] = NULL;
  blk->buf[blk->len++] = '\0';

  assert(blk->nvars < (int) ARRAY_SIZE(blk->vars));
  assert(blk->len > 0);
  assert(blk->len < (int) sizeof(blk->buf));
}

static void handle_cgi_request(struct mg_connection *conn, const char *prog) {
  int headers_len, data_len, i, fd_stdin[2], fd_stdout[2];
  const char *status, *status_text;
  char buf[BUFSIZ], *pbuf, dir[PATH_MAX], *p;
  struct mg_request_info ri;
  struct cgi_env_block blk;
  FILE *in, *out;
  pid_t pid;

  prepare_cgi_environment(conn, prog, &blk);

  // CGI must be executed in its own directory. 'dir' must point to the
  // directory containing executable program, 'p' must point to the
  // executable program name relative to 'dir'.
  (void) mg_snprintf(conn, dir, sizeof(dir), "%s", prog);
  if ((p = strrchr(dir, DIRSEP)) != NULL) {
    *p++ = '\0';
  } else {
    dir[0] = '.', dir[1] = '\0';
    p = (char *) prog;
  }

  pid = (pid_t) -1;
  fd_stdin[0] = fd_stdin[1] = fd_stdout[0] = fd_stdout[1] = -1;
  in = out = NULL;

  if (pipe(fd_stdin) != 0 || pipe(fd_stdout) != 0) {
    send_http_error(conn, 500, http_500_error,
        "Cannot create CGI pipe: %s", strerror(ERRNO));
    goto done;
  } else if ((pid = spawn_process(conn, p, blk.buf, blk.vars,
          fd_stdin[0], fd_stdout[1], dir)) == (pid_t) -1) {
    goto done;
  } else if ((in = fdopen(fd_stdin[1], "wb")) == NULL ||
      (out = fdopen(fd_stdout[0], "rb")) == NULL) {
    send_http_error(conn, 500, http_500_error,
        "fopen: %s", strerror(ERRNO));
    goto done;
  }

  setbuf(in, NULL);
  setbuf(out, NULL);

  // spawn_process() must close those!
  // If we don't mark them as closed, close() attempt before
  // return from this function throws an exception on Windows.
  // Windows does not like when closed descriptor is closed again.
  fd_stdin[0] = fd_stdout[1] = -1;

  // Send POST data to the CGI process if needed
  if (!strcmp(conn->request_info.request_method, "POST") &&
      !forward_body_data(conn, in, INVALID_SOCKET, NULL)) {
    goto done;
  }

  // Now read CGI reply into a buffer. We need to set correct
  // status code, thus we need to see all HTTP headers first.
  // Do not send anything back to client, until we buffer in all
  // HTTP headers.
  data_len = 0;
  headers_len = read_request(out, INVALID_SOCKET, NULL,
      buf, sizeof(buf), &data_len);
  if (headers_len <= 0) {
    send_http_error(conn, 500, http_500_error,
                    "CGI program sent malformed HTTP headers: [%.*s]",
                    data_len, buf);
    goto done;
  }
  pbuf = buf;
  buf[headers_len - 1] = '\0';
  parse_http_headers(&pbuf, &ri);

  // Make up and send the status line
  status_text = "OK";
  if ((status = get_header(&ri, "Status")) != NULL) {
    conn->request_info.status_code = atoi(status);
    status_text = status;
    while (isdigit(* (unsigned char *) status_text) || *status_text == ' ') {
      status_text++;
    }
  } else if (get_header(&ri, "Location") != NULL) {
    conn->request_info.status_code = 302;
  } else {
    conn->request_info.status_code = 200;
  }
  if (get_header(&ri, "Connection") != NULL &&
      !mg_strcasecmp(get_header(&ri, "Connection"), "keep-alive")) {
    conn->must_close = 1;
  }
  (void) mg_printf(conn, "HTTP/1.1 %d %s\r\n", conn->request_info.status_code,
                   status_text);

  // Send headers
  for (i = 0; i < ri.num_headers; i++) {
    mg_printf(conn, "%s: %s\r\n",
              ri.http_headers[i].name, ri.http_headers[i].value);
  }
  (void) mg_write(conn, "\r\n", 2);

  // Send chunk of data that may be read after the headers
  conn->num_bytes_sent += mg_write(conn, buf + headers_len,
                                   (size_t)(data_len - headers_len));

  // Read the rest of CGI output and send to the client
  send_file_data(conn, out, INT64_MAX);

done:
  if (pid != (pid_t) -1) {
    kill(pid, SIGKILL);
  }
  if (fd_stdin[0] != -1) {
    (void) close(fd_stdin[0]);
  }
  if (fd_stdout[1] != -1) {
    (void) close(fd_stdout[1]);
  }

  if (in != NULL) {
    (void) fclose(in);
  } else if (fd_stdin[1] != -1) {
    (void) close(fd_stdin[1]);
  }

  if (out != NULL) {
    (void) fclose(out);
  } else if (fd_stdout[0] != -1) {
    (void) close(fd_stdout[0]);
  }
}
#endif // !NO_CGI

// For a given PUT path, create all intermediate subdirectories
// for given path. Return 0 if the path itself is a directory,
// or -1 on error, 1 if OK.
static int put_dir(const char *path) {
  char buf[PATH_MAX];
  const char *s, *p;
  struct mgstat st;
  int len, res = 1;

  for (s = p = path + 2; (p = strchr(s, DIRSEP)) != NULL; s = ++p) {
    len = p - path;
    if (len >= (int) sizeof(buf)) {
      res = -1;
      break;
    }
    memcpy(buf, path, len);
    buf[len] = '\0';

    // Try to create intermediate directory
    DEBUG_TRACE(("mkdir(%s)", buf));
    if (mg_stat(buf, &st) == -1 && mg_mkdir(buf, 0755) != 0) {
      res = -1;
      break;
    }

    // Is path itself a directory?
    if (p[1] == '\0') {
      res = 0;
    }
  }

  return res;
}

static void put_file(struct mg_connection *conn, const char *path) {
  struct mgstat st;
  const char *range;
  int64_t r1, r2;
  FILE *fp;
  int rc;

  conn->request_info.status_code = mg_stat(path, &st) == 0 ? 200 : 201;

  if ((rc = put_dir(path)) == 0) {
    mg_printf(conn, "HTTP/1.1 %d OK\r\n\r\n", conn->request_info.status_code);
  } else if (rc == -1) {
    send_http_error(conn, 500, http_500_error,
        "put_dir(%s): %s", path, strerror(ERRNO));
  } else if ((fp = mg_fopen(path, "wb+")) == NULL) {
    send_http_error(conn, 500, http_500_error,
        "fopen(%s): %s", path, strerror(ERRNO));
  } else {
    set_close_on_exec(fileno(fp));
    range = mg_get_header(conn, "Content-Range");
    r1 = r2 = 0;
    if (range != NULL && parse_range_header(range, &r1, &r2) > 0) {
      conn->request_info.status_code = 206;
      // TODO(lsm): handle seek error
      (void) fseeko(fp, (off_t) r1, SEEK_SET);
    }
    if (forward_body_data(conn, fp, INVALID_SOCKET, NULL))
      (void) mg_printf(conn, "HTTP/1.1 %d OK\r\n\r\n",
          conn->request_info.status_code);
    (void) fclose(fp);
  }
}

static void send_ssi_file(struct mg_connection *, const char *, FILE *, int);

static void do_ssi_include(struct mg_connection *conn, const char *ssi,
                           char *tag, int include_level) {
  char file_name[BUFSIZ], path[PATH_MAX], *p;
  FILE *fp;

  // sscanf() is safe here, since send_ssi_file() also uses buffer
  // of size BUFSIZ to get the tag. So strlen(tag) is always < BUFSIZ.
  if (sscanf(tag, " virtual=\"%[^\"]\"", file_name) == 1) {
    // File name is relative to the webserver root
    (void) mg_snprintf(conn, path, sizeof(path), "%s%c%s",
        conn->ctx->config[DOCUMENT_ROOT], DIRSEP, file_name);
  } else if (sscanf(tag, " file=\"%[^\"]\"", file_name) == 1) {
    // File name is relative to the webserver working directory
    // or it is absolute system path
    (void) mg_snprintf(conn, path, sizeof(path), "%s", file_name);
  } else if (sscanf(tag, " \"%[^\"]\"", file_name) == 1) {
    // File name is relative to the currect document
    (void) mg_snprintf(conn, path, sizeof(path), "%s", ssi);
    if ((p = strrchr(path, DIRSEP)) != NULL) {
      p[1] = '\0';
    }
    (void) mg_snprintf(conn, path + strlen(path),
        sizeof(path) - strlen(path), "%s", file_name);
  } else {
    cry(conn, "Bad SSI #include: [%s]", tag);
    return;
  }

  if ((fp = mg_fopen(path, "rb")) == NULL) {
    cry(conn, "Cannot open SSI #include: [%s]: fopen(%s): %s",
        tag, path, strerror(ERRNO));
  } else {
    set_close_on_exec(fileno(fp));
    if (match_prefix(conn->ctx->config[SSI_EXTENSIONS],
                     strlen(conn->ctx->config[SSI_EXTENSIONS]), path) > 0) {
      send_ssi_file(conn, path, fp, include_level + 1);
    } else {
      send_file_data(conn, fp, INT64_MAX);
    }
    (void) fclose(fp);
  }
}

#if !defined(NO_POPEN)
static void do_ssi_exec(struct mg_connection *conn, char *tag) {
  char cmd[BUFSIZ];
  FILE *fp;

  if (sscanf(tag, " \"%[^\"]\"", cmd) != 1) {
    cry(conn, "Bad SSI #exec: [%s]", tag);
  } else if ((fp = popen(cmd, "r")) == NULL) {
    cry(conn, "Cannot SSI #exec: [%s]: %s", cmd, strerror(ERRNO));
  } else {
    send_file_data(conn, fp, INT64_MAX);
    (void) pclose(fp);
  }
}
#endif // !NO_POPEN

static void send_ssi_file(struct mg_connection *conn, const char *path,
                          FILE *fp, int include_level) {
  char buf[BUFSIZ];
  int ch, len, in_ssi_tag;

  if (include_level > 10) {
    cry(conn, "SSI #include level is too deep (%s)", path);
    return;
  }

  in_ssi_tag = 0;
  len = 0;

  while ((ch = fgetc(fp)) != EOF) {
    if (in_ssi_tag && ch == '>') {
      in_ssi_tag = 0;
      buf[len++] = (char) ch;
      buf[len] = '\0';
      assert(len <= (int) sizeof(buf));
      if (len < 6 || memcmp(buf, "<!--#", 5) != 0) {
        // Not an SSI tag, pass it
        (void) mg_write(conn, buf, (size_t)len);
      } else {
        if (!memcmp(buf + 5, "include", 7)) {
          do_ssi_include(conn, path, buf + 12, include_level);
#if !defined(NO_POPEN)
        } else if (!memcmp(buf + 5, "exec", 4)) {
          do_ssi_exec(conn, buf + 9);
#endif // !NO_POPEN
        } else {
          cry(conn, "%s: unknown SSI " "command: \"%s\"", path, buf);
        }
      }
      len = 0;
    } else if (in_ssi_tag) {
      if (len == 5 && memcmp(buf, "<!--#", 5) != 0) {
        // Not an SSI tag
        in_ssi_tag = 0;
      } else if (len == (int) sizeof(buf) - 2) {
        cry(conn, "%s: SSI tag is too large", path);
        len = 0;
      }
      buf[len++] = ch & 0xff;
    } else if (ch == '<') {
      in_ssi_tag = 1;
      if (len > 0) {
        (void) mg_write(conn, buf, (size_t)len);
      }
      len = 0;
      buf[len++] = ch & 0xff;
    } else {
      buf[len++] = ch & 0xff;
      if (len == (int) sizeof(buf)) {
        (void) mg_write(conn, buf, (size_t)len);
        len = 0;
      }
    }
  }

  // Send the rest of buffered data
  if (len > 0) {
    (void) mg_write(conn, buf, (size_t)len);
  }
}

static void handle_ssi_file_request(struct mg_connection *conn,
                                    const char *path) {
  FILE *fp;

  if ((fp = mg_fopen(path, "rb")) == NULL) {
    send_http_error(conn, 500, http_500_error, "fopen(%s): %s", path,
                    strerror(ERRNO));
  } else {
    conn->must_close = 1;
    set_close_on_exec(fileno(fp));
    mg_printf(conn, "HTTP/1.1 200 OK\r\n"
              "Content-Type: text/html\r\nConnection: %s\r\n\r\n",
              suggest_connection_header(conn));
    send_ssi_file(conn, path, fp, 0);
    (void) fclose(fp);
  }
}

static void send_options(struct mg_connection *conn) {
  conn->request_info.status_code = 200;

  (void) mg_printf(conn,
      "HTTP/1.1 200 OK\r\n"
      "Allow: GET, POST, HEAD, CONNECT, PUT, DELETE, OPTIONS\r\n"
      "DAV: 1\r\n\r\n");
}

// Writes PROPFIND properties for a collection element
static void print_props(struct mg_connection *conn, const char* uri,
                        struct mgstat* st) {
  char mtime[64];
  gmt_time_string(mtime, sizeof(mtime), &st->mtime);
  conn->num_bytes_sent += mg_printf(conn,
      "<d:response>"
       "<d:href>%s</d:href>"
       "<d:propstat>"
        "<d:prop>"
         "<d:resourcetype>%s</d:resourcetype>"
         "<d:getcontentlength>%" INT64_FMT "</d:getcontentlength>"
         "<d:getlastmodified>%s</d:getlastmodified>"
        "</d:prop>"
        "<d:status>HTTP/1.1 200 OK</d:status>"
       "</d:propstat>"
      "</d:response>\n",
      uri,
      st->is_directory ? "<d:collection/>" : "",
      st->size,
      mtime);
}

static void print_dav_dir_entry(struct de *de, void *data) {
  char href[PATH_MAX];
  struct mg_connection *conn = (struct mg_connection *) data;
  mg_snprintf(conn, href, sizeof(href), "%s%s",
              conn->request_info.uri, de->file_name);
  print_props(conn, href, &de->st);
}

static void handle_propfind(struct mg_connection *conn, const char* path,
                            struct mgstat* st) {
  const char *depth = mg_get_header(conn, "Depth");

  conn->must_close = 1;
  conn->request_info.status_code = 207;
  mg_printf(conn, "HTTP/1.1 207 Multi-Status\r\n"
            "Connection: close\r\n"
            "Content-Type: text/xml; charset=utf-8\r\n\r\n");

  conn->num_bytes_sent += mg_printf(conn,
      "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
      "<d:multistatus xmlns:d='DAV:'>\n");

  // Print properties for the requested resource itself
  print_props(conn, conn->request_info.uri, st);

  // If it is a directory, print directory entries too if Depth is not 0
  if (st->is_directory &&
      !mg_strcasecmp(conn->ctx->config[ENABLE_DIRECTORY_LISTING], "yes") &&
      (depth == NULL || strcmp(depth, "0") != 0)) {
    scan_directory(conn, path, conn, &print_dav_dir_entry);
  }

  conn->num_bytes_sent += mg_printf(conn, "%s\n", "</d:multistatus>");
}

// This is the heart of the Mongoose's logic.
// This function is called when the request is read, parsed and validated,
// and Mongoose must decide what action to take: serve a file, or
// a directory, or call embedded function, etcetera.
static void handle_request(struct mg_connection *conn) {
  struct mg_request_info *ri = &conn->request_info;
  char path[PATH_MAX];
  int stat_result, uri_len;
  struct mgstat st;

  if ((conn->request_info.query_string = strchr(ri->uri, '?')) != NULL) {
    * conn->request_info.query_string++ = '\0';
  }
  uri_len = strlen(ri->uri);
  url_decode(ri->uri, (size_t)uri_len, ri->uri, (size_t)(uri_len + 1), 0);
  remove_double_dots_and_double_slashes(ri->uri);
  stat_result = convert_uri_to_file_name(conn, path, sizeof(path), &st);

  DEBUG_TRACE(("%s", ri->uri));
  if (!check_authorization(conn, path)) {
    send_authorization_request(conn);
  } else if (call_user(conn, MG_NEW_REQUEST) != NULL) {
    // Do nothing, callback has served the request
  } else if (!strcmp(ri->request_method, "OPTIONS")) {
    send_options(conn);
  } else if (strstr(path, PASSWORDS_FILE_NAME)) {
    // Do not allow to view passwords files
    send_http_error(conn, 403, "Forbidden", "Access Forbidden");
  } else if (conn->ctx->config[DOCUMENT_ROOT] == NULL) {
    send_http_error(conn, 404, "Not Found", "Not Found");
  } else if ((!strcmp(ri->request_method, "PUT") ||
        !strcmp(ri->request_method, "DELETE")) &&
      (conn->ctx->config[PUT_DELETE_PASSWORDS_FILE] == NULL ||
       !is_authorized_for_put(conn))) {
    send_authorization_request(conn);
  } else if (!strcmp(ri->request_method, "PUT")) {
    put_file(conn, path);
  } else if (!strcmp(ri->request_method, "DELETE")) {
    if (mg_remove(path) == 0) {
      send_http_error(conn, 200, "OK", "");
    } else {
      send_http_error(conn, 500, http_500_error, "remove(%s): %s", path,
                      strerror(ERRNO));
    }
  } else if (stat_result != 0) {
    send_http_error(conn, 404, "Not Found", "%s", "File not found");
  } else if (st.is_directory && ri->uri[uri_len - 1] != '/') {
    (void) mg_printf(conn,
        "HTTP/1.1 301 Moved Permanently\r\n"
        "Location: %s/\r\n\r\n", ri->uri);
  } else if (!strcmp(ri->request_method, "PROPFIND")) {
    handle_propfind(conn, path, &st);
  } else if (st.is_directory &&
             !substitute_index_file(conn, path, sizeof(path), &st)) {
    if (!mg_strcasecmp(conn->ctx->config[ENABLE_DIRECTORY_LISTING], "yes")) {
      handle_directory_request(conn, path);
    } else {
      send_http_error(conn, 403, "Directory Listing Denied",
          "Directory listing denied");
    }
#if !defined(NO_CGI)
  } else if (match_prefix(conn->ctx->config[CGI_EXTENSIONS],
                          strlen(conn->ctx->config[CGI_EXTENSIONS]),
                          path) > 0) {
    if (strcmp(ri->request_method, "POST") &&
        strcmp(ri->request_method, "GET")) {
      send_http_error(conn, 501, "Not Implemented",
                      "Method %s is not implemented", ri->request_method);
    } else {
      handle_cgi_request(conn, path);
    }
#endif // !NO_CGI
  } else if (match_prefix(conn->ctx->config[SSI_EXTENSIONS],
                          strlen(conn->ctx->config[SSI_EXTENSIONS]),
                          path) > 0) {
    handle_ssi_file_request(conn, path);
  } else if (is_not_modified(conn, &st)) {
    send_http_error(conn, 304, "Not Modified", "");
  } else {
    handle_file_request(conn, path, &st);
  }
}

static void close_all_listening_sockets(struct mg_context *ctx) {
  struct socket *sp, *tmp;
  for (sp = ctx->listening_sockets; sp != NULL; sp = tmp) {
    tmp = sp->next;
    (void) closesocket(sp->sock);
    free(sp);
  }
}

// Valid listening port specification is: [ip_address:]port[s]
// Examples: 80, 443s, 127.0.0.1:3128,1.2.3.4:8080s
// TODO(lsm): add parsing of the IPv6 address
static int parse_port_string(const struct vec *vec, struct socket *so) {
  int a, b, c, d, port, len;

  // MacOS needs that. If we do not zero it, subsequent bind() will fail.
  // Also, all-zeroes in the socket address means binding to all addresses
  // for both IPv4 and IPv6 (INADDR_ANY and IN6ADDR_ANY_INIT).
  memset(so, 0, sizeof(*so));

  if (sscanf(vec->ptr, "%d.%d.%d.%d:%d%n", &a, &b, &c, &d, &port, &len) == 5) {
    // Bind to a specific IPv4 address
    so->lsa.sin.sin_addr.s_addr = htonl((a << 24) | (b << 16) | (c << 8) | d);
  } else if (sscanf(vec->ptr, "%d%n", &port, &len) != 1 ||
             len <= 0 ||
             len > (int) vec->len ||
             (vec->ptr[len] && vec->ptr[len] != 's' && vec->ptr[len] != ',')) {
    return 0;
  }

  so->is_ssl = vec->ptr[len] == 's';
#if defined(USE_IPV6)
  so->lsa.sin6.sin6_family = AF_INET6;
  so->lsa.sin6.sin6_port = htons((uint16_t) port);
#else
  so->lsa.sin.sin_family = AF_INET;
  so->lsa.sin.sin_port = htons((uint16_t) port);
#endif

  return 1;
}

static int set_ports_option(struct mg_context *ctx) {
  const char *list = ctx->config[LISTENING_PORTS];
  int on = 1, success = 1;
  SOCKET sock;
  struct vec vec;
  struct socket so, *listener;

  while (success && (list = next_option(list, &vec, NULL)) != NULL) {
    if (!parse_port_string(&vec, &so)) {
      cry(fc(ctx), "%s: %.*s: invalid port spec. Expecting list of: %s",
          __func__, vec.len, vec.ptr, "[IP_ADDRESS:]PORT[s|p]");
      success = 0;
    } else if (so.is_ssl && ctx->ssl_ctx == NULL) {
      cry(fc(ctx), "Cannot add SSL socket, is -ssl_certificate option set?");
      success = 0;
    } else if ((sock = socket(so.lsa.sa.sa_family, SOCK_STREAM, 6)) ==
               INVALID_SOCKET ||
#if !defined(_WIN32)
               // On Windows, SO_REUSEADDR is recommended only for
               // broadcast UDP sockets
               setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on,
                          sizeof(on)) != 0 ||
#endif // !_WIN32
               // Set TCP keep-alive. This is needed because if HTTP-level
               // keep-alive is enabled, and client resets the connection,
               // server won't get TCP FIN or RST and will keep the connection
               // open forever. With TCP keep-alive, next keep-alive
               // handshake will figure out that the client is down and
               // will close the server end.
               // Thanks to Igor Klopov who suggested the patch.
               setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (void *) &on,
                          sizeof(on)) != 0 ||
               bind(sock, &so.lsa.sa, sizeof(so.lsa)) != 0 ||
               listen(sock, 100) != 0) {
      closesocket(sock);
      cry(fc(ctx), "%s: cannot bind to %.*s: %s", __func__,
          vec.len, vec.ptr, strerror(ERRNO));
      success = 0;
    } else if ((listener = (struct socket *)
                calloc(1, sizeof(*listener))) == NULL) {
      closesocket(sock);
      cry(fc(ctx), "%s: %s", __func__, strerror(ERRNO));
      success = 0;
    } else {
      *listener = so;
      listener->sock = sock;
      set_close_on_exec(listener->sock);
      listener->next = ctx->listening_sockets;
      ctx->listening_sockets = listener;
    }
  }

  if (!success) {
    close_all_listening_sockets(ctx);
  }

  return success;
}

static void log_header(const struct mg_connection *conn, const char *header,
                       FILE *fp) {
  const char *header_value;

  if ((header_value = mg_get_header(conn, header)) == NULL) {
    (void) fprintf(fp, "%s", " -");
  } else {
    (void) fprintf(fp, " \"%s\"", header_value);
  }
}

static void log_access(const struct mg_connection *conn) {
  const struct mg_request_info *ri;
  FILE *fp;
  char date[64], src_addr[20];

  fp = conn->ctx->config[ACCESS_LOG_FILE] == NULL ?  NULL :
    mg_fopen(conn->ctx->config[ACCESS_LOG_FILE], "a+");

  if (fp == NULL)
    return;

  strftime(date, sizeof(date), "%d/%b/%Y:%H:%M:%S %z",
           localtime(&conn->birth_time));

  ri = &conn->request_info;
  flockfile(fp);

  sockaddr_to_string(src_addr, sizeof(src_addr), &conn->client.rsa);
  fprintf(fp, "%s - %s [%s] \"%s %s HTTP/%s\" %d %" INT64_FMT,
          src_addr, ri->remote_user == NULL ? "-" : ri->remote_user, date,
          ri->request_method ? ri->request_method : "-",
          ri->uri ? ri->uri : "-", ri->http_version,
          conn->request_info.status_code, conn->num_bytes_sent);
  log_header(conn, "Referer", fp);
  log_header(conn, "User-Agent", fp);
  fputc('\n', fp);
  fflush(fp);

  funlockfile(fp);
  fclose(fp);
}

static int isbyte(int n) {
  return n >= 0 && n <= 255;
}

// Verify given socket address against the ACL.
// Return -1 if ACL is malformed, 0 if address is disallowed, 1 if allowed.
static int check_acl(struct mg_context *ctx, const union usa *usa) {
  int a, b, c, d, n, mask, allowed;
  char flag;
  uint32_t acl_subnet, acl_mask, remote_ip;
  struct vec vec;
  const char *list = ctx->config[ACCESS_CONTROL_LIST];

  if (list == NULL) {
    return 1;
  }

  (void) memcpy(&remote_ip, &usa->sin.sin_addr, sizeof(remote_ip));

  // If any ACL is set, deny by default
  allowed = '-';

  while ((list = next_option(list, &vec, NULL)) != NULL) {
    mask = 32;

    if (sscanf(vec.ptr, "%c%d.%d.%d.%d%n", &flag, &a, &b, &c, &d, &n) != 5) {
      cry(fc(ctx), "%s: subnet must be [+|-]x.x.x.x[/x]", __func__);
      return -1;
    } else if (flag != '+' && flag != '-') {
      cry(fc(ctx), "%s: flag must be + or -: [%s]", __func__, vec.ptr);
      return -1;
    } else if (!isbyte(a)||!isbyte(b)||!isbyte(c)||!isbyte(d)) {
      cry(fc(ctx), "%s: bad ip address: [%s]", __func__, vec.ptr);
      return -1;
    } else if (sscanf(vec.ptr + n, "/%d", &mask) == 0) {
      // Do nothing, no mask specified
    } else if (mask < 0 || mask > 32) {
      cry(fc(ctx), "%s: bad subnet mask: %d [%s]", __func__, n, vec.ptr);
      return -1;
    }

    acl_subnet = (a << 24) | (b << 16) | (c << 8) | d;
    acl_mask = mask ? 0xffffffffU << (32 - mask) : 0;

    if (acl_subnet == (ntohl(remote_ip) & acl_mask)) {
      allowed = flag;
    }
  }

  return allowed == '+';
}

static void add_to_set(SOCKET fd, fd_set *set, int *max_fd) {
  FD_SET(fd, set);
  if (fd > (SOCKET) *max_fd) {
    *max_fd = (int) fd;
  }
}

#if !defined(_WIN32)
static int set_uid_option(struct mg_context *ctx) {
  struct passwd *pw;
  const char *uid = ctx->config[RUN_AS_USER];
  int success = 0;

  if (uid == NULL) {
    success = 1;
  } else {
    if ((pw = getpwnam(uid)) == NULL) {
      cry(fc(ctx), "%s: unknown user [%s]", __func__, uid);
    } else if (setgid(pw->pw_gid) == -1) {
      cry(fc(ctx), "%s: setgid(%s): %s", __func__, uid, strerror(errno));
    } else if (setuid(pw->pw_uid) == -1) {
      cry(fc(ctx), "%s: setuid(%s): %s", __func__, uid, strerror(errno));
    } else {
      success = 1;
    }
  }

  return success;
}
#endif // !_WIN32

#if !defined(NO_SSL)
static pthread_mutex_t *ssl_mutexes;

static void ssl_locking_callback(int mode, int mutex_num, const char *file,
                                 int line) {
  line = 0;    // Unused
  file = NULL; // Unused

  if (mode & CRYPTO_LOCK) {
    (void) pthread_mutex_lock(&ssl_mutexes[mutex_num]);
  } else {
    (void) pthread_mutex_unlock(&ssl_mutexes[mutex_num]);
  }
}

static unsigned long ssl_id_callback(void) {
  return (unsigned long) pthread_self();
}

#if !defined(NO_SSL_DL)
static int load_dll(struct mg_context *ctx, const char *dll_name,
                    struct ssl_func *sw) {
  union {void *p; void (*fp)(void);} u;
  void  *dll_handle;
  struct ssl_func *fp;

  if ((dll_handle = dlopen(dll_name, RTLD_LAZY)) == NULL) {
    cry(fc(ctx), "%s: cannot load %s", __func__, dll_name);
    return 0;
  }

  for (fp = sw; fp->name != NULL; fp++) {
#ifdef _WIN32
    // GetProcAddress() returns pointer to function
    u.fp = (void (*)(void)) dlsym(dll_handle, fp->name);
#else
    // dlsym() on UNIX returns void *. ISO C forbids casts of data pointers to
    // function pointers. We need to use a union to make a cast.
    u.p = dlsym(dll_handle, fp->name);
#endif // _WIN32
    if (u.fp == NULL) {
      cry(fc(ctx), "%s: %s: cannot find %s", __func__, dll_name, fp->name);
      return 0;
    } else {
      fp->ptr = u.fp;
    }
  }

  return 1;
}
#endif // NO_SSL_DL

// Dynamically load SSL library. Set up ctx->ssl_ctx pointer.
static int set_ssl_option(struct mg_context *ctx) {
  struct mg_request_info request_info;
  SSL_CTX *CTX;
  int i, size;
  const char *pem = ctx->config[SSL_CERTIFICATE];
  const char *chain = ctx->config[SSL_CHAIN_FILE];

  if (pem == NULL) {
    return 1;
  }

#if !defined(NO_SSL_DL)
  if (!load_dll(ctx, SSL_LIB, ssl_sw) ||
      !load_dll(ctx, CRYPTO_LIB, crypto_sw)) {
    return 0;
  }
#endif // NO_SSL_DL

  // Initialize SSL crap
  SSL_library_init();
  SSL_load_error_strings();

  if ((CTX = SSL_CTX_new(SSLv23_server_method())) == NULL) {
    cry(fc(ctx), "SSL_CTX_new error: %s", ssl_error());
  } else if (ctx->user_callback != NULL) {
    memset(&request_info, 0, sizeof(request_info));
    request_info.user_data = ctx->user_data;
    ctx->user_callback(MG_INIT_SSL, (struct mg_connection *) CTX,
                       &request_info);
  }

  if (CTX != NULL && SSL_CTX_use_certificate_file(CTX, pem,
        SSL_FILETYPE_PEM) == 0) {
    cry(fc(ctx), "%s: cannot open %s: %s", __func__, pem, ssl_error());
    return 0;
  } else if (CTX != NULL && SSL_CTX_use_PrivateKey_file(CTX, pem,
        SSL_FILETYPE_PEM) == 0) {
    cry(fc(ctx), "%s: cannot open %s: %s", NULL, pem, ssl_error());
    return 0;
  }

  if (CTX != NULL && chain != NULL &&
      SSL_CTX_use_certificate_chain_file(CTX, chain) == 0) {
    cry(fc(ctx), "%s: cannot open %s: %s", NULL, chain, ssl_error());
    return 0;
  }

  // Initialize locking callbacks, needed for thread safety.
  // http://www.openssl.org/support/faq.html#PROG1
  size = sizeof(pthread_mutex_t) * CRYPTO_num_locks();
  if ((ssl_mutexes = (pthread_mutex_t *) malloc((size_t)size)) == NULL) {
    cry(fc(ctx), "%s: cannot allocate mutexes: %s", __func__, ssl_error());
    return 0;
  }

  for (i = 0; i < CRYPTO_num_locks(); i++) {
    pthread_mutex_init(&ssl_mutexes[i], NULL);
  }

  CRYPTO_set_locking_callback(&ssl_locking_callback);
  CRYPTO_set_id_callback(&ssl_id_callback);

  // Done with everything. Save the context.
  ctx->ssl_ctx = CTX;

  return 1;
}

static void uninitialize_ssl(struct mg_context *ctx) {
  int i;
  if (ctx->ssl_ctx != NULL) {
    CRYPTO_set_locking_callback(NULL);
    for (i = 0; i < CRYPTO_num_locks(); i++) {
      pthread_mutex_destroy(&ssl_mutexes[i]);
    }
    CRYPTO_set_locking_callback(NULL);
    CRYPTO_set_id_callback(NULL);
  }
}
#endif // !NO_SSL

static int set_gpass_option(struct mg_context *ctx) {
  struct mgstat mgstat;
  const char *path = ctx->config[GLOBAL_PASSWORDS_FILE];
  return path == NULL || mg_stat(path, &mgstat) == 0;
}

static int set_acl_option(struct mg_context *ctx) {
  union usa fake;
  return check_acl(ctx, &fake) != -1;
}

static void reset_per_request_attributes(struct mg_connection *conn) {
  struct mg_request_info *ri = &conn->request_info;

  // Reset request info attributes. DO NOT TOUCH is_ssl, remote_ip, remote_port
  ri->remote_user = ri->request_method = ri->uri = ri->http_version =
    conn->path_info = NULL;
  ri->num_headers = 0;
  ri->status_code = -1;

  conn->num_bytes_sent = conn->consumed_content = 0;
  conn->content_len = -1;
  conn->request_len = conn->data_len = 0;
  conn->must_close = 0;
}

static void close_socket_gracefully(SOCKET sock) {
  char buf[BUFSIZ];
  struct linger linger;
  int n;

  // Set linger option to avoid socket hanging out after close. This prevent
  // ephemeral port exhaust problem under high QPS.
  linger.l_onoff = 1;
  linger.l_linger = 1;
  setsockopt(sock, SOL_SOCKET, SO_LINGER, (void *) &linger, sizeof(linger));

  // Send FIN to the client
  (void) shutdown(sock, SHUT_WR);
  set_non_blocking_mode(sock);

  // Read and discard pending data. If we do not do that and close the
  // socket, the data in the send buffer may be discarded. This
  // behaviour is seen on Windows, when client keeps sending data
  // when server decide to close the connection; then when client
  // does recv() it gets no data back.
  do {
    n = pull(NULL, sock, NULL, buf, sizeof(buf));
  } while (n > 0);

  // Now we know that our FIN is ACK-ed, safe to close
  (void) closesocket(sock);
}

static void close_connection(struct mg_connection *conn) {
  if (conn->ssl) {
    SSL_free(conn->ssl);
    conn->ssl = NULL;
  }

  if (conn->client.sock != INVALID_SOCKET) {
    close_socket_gracefully(conn->client.sock);
  }
}

static void discard_current_request_from_buffer(struct mg_connection *conn) {
  // char *buffered;
  int buffered_len, body_len;

  //  buffered = conn->buf + conn->request_len;
  buffered_len = conn->data_len - conn->request_len;
  assert(buffered_len >= 0);

  if (conn->content_len == -1) {
    body_len = 0;
  } else if (conn->content_len < (int64_t) buffered_len) {
    body_len = (int) conn->content_len;
  } else {
    body_len = buffered_len;
  }

  conn->data_len -= conn->request_len + body_len;
  memmove(conn->buf, conn->buf + conn->request_len + body_len,
          (size_t) conn->data_len);
}

static int is_valid_uri(const char *uri) {
  // Conform to http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
  // URI can be an asterisk (*) or should start with slash.
  return uri[0] == '/' || (uri[0] == '*' && uri[1] == '\0');
}

static void process_new_connection(struct mg_connection *conn) {
  struct mg_request_info *ri = &conn->request_info;
  int keep_alive_enabled;
  const char *cl;

  keep_alive_enabled = !strcmp(conn->ctx->config[ENABLE_KEEP_ALIVE], "yes");
  do {
    reset_per_request_attributes(conn);

    // If next request is not pipelined, read it in
    if ((conn->request_len = get_request_len(conn->buf, conn->data_len)) == 0) {
      conn->request_len = read_request(NULL, conn->client.sock, conn->ssl,
          conn->buf, conn->buf_size, &conn->data_len);
    }
    assert(conn->data_len >= conn->request_len);
    if (conn->request_len == 0 && conn->data_len == conn->buf_size) {
      send_http_error(conn, 413, "Request Too Large", "");
      return;
    } if (conn->request_len <= 0) {
      return;  // Remote end closed the connection
    }

    // Nul-terminate the request cause parse_http_request() uses sscanf
    conn->buf[conn->request_len - 1] = '\0';
    if (!parse_http_request(conn->buf, ri) || !is_valid_uri(ri->uri)) {
      // Do not put garbage in the access log, just send it back to the client
      send_http_error(conn, 400, "Bad Request",
          "Cannot parse HTTP request: [%.*s]", conn->data_len, conn->buf);
    } else if (strcmp(ri->http_version, "1.0") &&
               strcmp(ri->http_version, "1.1")) {
      // Request seems valid, but HTTP version is strange
      send_http_error(conn, 505, "HTTP version not supported", "");
      log_access(conn);
    } else {
      // Request is valid, handle it
      cl = get_header(ri, "Content-Length");
      conn->content_len = cl == NULL ? -1 : strtoll(cl, NULL, 10);
      conn->birth_time = time(NULL);
      handle_request(conn);
      call_user(conn, MG_REQUEST_COMPLETE);
      log_access(conn);
      discard_current_request_from_buffer(conn);
    }
    if (ri->remote_user != NULL) {
      free((void *) ri->remote_user);
    }

  } while (conn->ctx->stop_flag == 0 &&
           keep_alive_enabled &&
           should_keep_alive(conn));
}

// Worker threads take accepted socket from the queue
static int consume_socket(struct mg_context *ctx, struct socket *sp) {
  (void) pthread_mutex_lock(&ctx->mutex);
  DEBUG_TRACE(("going idle"));

  // If the queue is empty, wait. We're idle at this point.
  while (ctx->sq_head == ctx->sq_tail && ctx->stop_flag == 0) {
    pthread_cond_wait(&ctx->sq_full, &ctx->mutex);
  }

  // If we're stopping, sq_head may be equal to sq_tail.
  if (ctx->sq_head > ctx->sq_tail) {
    // Copy socket from the queue and increment tail
    *sp = ctx->queue[ctx->sq_tail % ARRAY_SIZE(ctx->queue)];
    ctx->sq_tail++;
    DEBUG_TRACE(("grabbed socket %d, going busy", sp->sock));

    // Wrap pointers if needed
    while (ctx->sq_tail > (int) ARRAY_SIZE(ctx->queue)) {
      ctx->sq_tail -= ARRAY_SIZE(ctx->queue);
      ctx->sq_head -= ARRAY_SIZE(ctx->queue);
    }
  }

  (void) pthread_cond_signal(&ctx->sq_empty);
  (void) pthread_mutex_unlock(&ctx->mutex);

  return !ctx->stop_flag;
}

static void worker_thread(struct mg_context *ctx) {
  struct mg_connection *conn;
  int buf_size = atoi(ctx->config[MAX_REQUEST_SIZE]);

  conn = (struct mg_connection *) calloc(1, sizeof(*conn) + buf_size);
  if (conn == NULL) {
    cry(fc(ctx), "%s", "Cannot create new connection struct, OOM");
    return;
  }
  conn->buf_size = buf_size;
  conn->buf = (char *) (conn + 1);

  // Call consume_socket() even when ctx->stop_flag > 0, to let it signal
  // sq_empty condvar to wake up the master waiting in produce_socket()
  while (consume_socket(ctx, &conn->client)) {
    conn->birth_time = time(NULL);
    conn->ctx = ctx;

    // Fill in IP, port info early so even if SSL setup below fails,
    // error handler would have the corresponding info.
    // Thanks to Johannes Winkelmann for the patch.
    // TODO(lsm): Fix IPv6 case
    conn->request_info.remote_port = ntohs(conn->client.rsa.sin.sin_port);
    memcpy(&conn->request_info.remote_ip,
           &conn->client.rsa.sin.sin_addr.s_addr, 4);
    conn->request_info.remote_ip = ntohl(conn->request_info.remote_ip);
    conn->request_info.is_ssl = conn->client.is_ssl;

    if (!conn->client.is_ssl ||
        (conn->client.is_ssl && sslize(conn, SSL_accept))) {
      process_new_connection(conn);
    }

    close_connection(conn);
  }
  free(conn);

  // Signal master that we're done with connection and exiting
  (void) pthread_mutex_lock(&ctx->mutex);
  ctx->num_threads--;
  (void) pthread_cond_signal(&ctx->cond);
  assert(ctx->num_threads >= 0);
  (void) pthread_mutex_unlock(&ctx->mutex);

  DEBUG_TRACE(("exiting"));
}

// Master thread adds accepted socket to a queue
static void produce_socket(struct mg_context *ctx, const struct socket *sp) {
  (void) pthread_mutex_lock(&ctx->mutex);

  // If the queue is full, wait
  while (ctx->stop_flag == 0 &&
         ctx->sq_head - ctx->sq_tail >= (int) ARRAY_SIZE(ctx->queue)) {
    (void) pthread_cond_wait(&ctx->sq_empty, &ctx->mutex);
  }

  if (ctx->sq_head - ctx->sq_tail < (int) ARRAY_SIZE(ctx->queue)) {
    // Copy socket to the queue and increment head
    ctx->queue[ctx->sq_head % ARRAY_SIZE(ctx->queue)] = *sp;
    ctx->sq_head++;
    DEBUG_TRACE(("queued socket %d", sp->sock));
  }

  (void) pthread_cond_signal(&ctx->sq_full);
  (void) pthread_mutex_unlock(&ctx->mutex);
}

static void accept_new_connection(const struct socket *listener,
                                  struct mg_context *ctx) {
  struct socket accepted;
  char src_addr[20];
  socklen_t len;
  int allowed;

  len = sizeof(accepted.rsa);
  accepted.lsa = listener->lsa;
  accepted.sock = accept(listener->sock, &accepted.rsa.sa, &len);
  if (accepted.sock != INVALID_SOCKET) {
    allowed = check_acl(ctx, &accepted.rsa);
    if (allowed) {
      // Put accepted socket structure into the queue
      DEBUG_TRACE(("accepted socket %d", accepted.sock));
      accepted.is_ssl = listener->is_ssl;
      produce_socket(ctx, &accepted);
    } else {
      sockaddr_to_string(src_addr, sizeof(src_addr), &accepted.rsa);
      cry(fc(ctx), "%s: %s is not allowed to connect", __func__, src_addr);
      (void) closesocket(accepted.sock);
    }
  }
}

static void master_thread(struct mg_context *ctx) {
  fd_set read_set;
  struct timeval tv;
  struct socket *sp;
  int max_fd;

  // Increase priority of the master thread
#if defined(_WIN32)
  SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
#endif
  
#if defined(ISSUE_317)
  struct sched_param sched_param;
  sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
  pthread_setschedparam(pthread_self(), SCHED_RR, &sched_param);
#endif

  while (ctx->stop_flag == 0) {
    FD_ZERO(&read_set);
    max_fd = -1;

    // Add listening sockets to the read set
    for (sp = ctx->listening_sockets; sp != NULL; sp = sp->next) {
      add_to_set(sp->sock, &read_set, &max_fd);
    }

    tv.tv_sec = 0;
    tv.tv_usec = 200 * 1000;

    if (select(max_fd + 1, &read_set, NULL, NULL, &tv) < 0) {
#ifdef _WIN32
      // On windows, if read_set and write_set are empty,
      // select() returns "Invalid parameter" error
      // (at least on my Windows XP Pro). So in this case, we sleep here.
      sleep(1);
#endif // _WIN32
    } else {
      for (sp = ctx->listening_sockets; sp != NULL; sp = sp->next) {
        if (ctx->stop_flag == 0 && FD_ISSET(sp->sock, &read_set)) {
          accept_new_connection(sp, ctx);
        }
      }
    }
  }
  DEBUG_TRACE(("stopping workers"));

  // Stop signal received: somebody called mg_stop. Quit.
  close_all_listening_sockets(ctx);

  // Wakeup workers that are waiting for connections to handle.
  pthread_cond_broadcast(&ctx->sq_full);

  // Wait until all threads finish
  (void) pthread_mutex_lock(&ctx->mutex);
  while (ctx->num_threads > 0) {
    (void) pthread_cond_wait(&ctx->cond, &ctx->mutex);
  }
  (void) pthread_mutex_unlock(&ctx->mutex);

  // All threads exited, no sync is needed. Destroy mutex and condvars
  (void) pthread_mutex_destroy(&ctx->mutex);
  (void) pthread_cond_destroy(&ctx->cond);
  (void) pthread_cond_destroy(&ctx->sq_empty);
  (void) pthread_cond_destroy(&ctx->sq_full);

#if !defined(NO_SSL)
  uninitialize_ssl(ctx);
#endif

  // Signal mg_stop() that we're done
  ctx->stop_flag = 2;

  DEBUG_TRACE(("exiting"));
}

static void free_context(struct mg_context *ctx) {
  int i;

  // Deallocate config parameters
  for (i = 0; i < NUM_OPTIONS; i++) {
    if (ctx->config[i] != NULL)
      free(ctx->config[i]);
  }

  // Deallocate SSL context
  if (ctx->ssl_ctx != NULL) {
    SSL_CTX_free(ctx->ssl_ctx);
  }
#ifndef NO_SSL
  if (ssl_mutexes != NULL) {
    free(ssl_mutexes);
  }
#endif // !NO_SSL

  // Deallocate context itself
  free(ctx);
}

void mg_stop(struct mg_context *ctx) {
  ctx->stop_flag = 1;

  // Wait until mg_fini() stops
  while (ctx->stop_flag != 2) {
    (void) sleep(0);
  }
  free_context(ctx);

#if defined(_WIN32) && !defined(__SYMBIAN32__)
  (void) WSACleanup();
#endif // _WIN32
}

struct mg_context *mg_start(mg_callback_t user_callback, void *user_data,
                            const char **options) {
  struct mg_context *ctx;
  const char *name, *value, *default_value;
  int i;

#if defined(_WIN32) && !defined(__SYMBIAN32__)
  WSADATA data;
  WSAStartup(MAKEWORD(2,2), &data);
  InitializeCriticalSection(&global_log_file_lock);
#endif // _WIN32

  // Allocate context and initialize reasonable general case defaults.
  // TODO(lsm): do proper error handling here.
  ctx = (struct mg_context *) calloc(1, sizeof(*ctx));
  ctx->user_callback = user_callback;
  ctx->user_data = user_data;

  while (options && (name = *options++) != NULL) {
    if ((i = get_option_index(name)) == -1) {
      cry(fc(ctx), "Invalid option: %s", name);
      free_context(ctx);
      return NULL;
    } else if ((value = *options++) == NULL) {
      cry(fc(ctx), "%s: option value cannot be NULL", name);
      free_context(ctx);
      return NULL;
    }
    if (ctx->config[i] != NULL) {
      cry(fc(ctx), "%s: duplicate option", name);
    }
    ctx->config[i] = mg_strdup(value);
    DEBUG_TRACE(("[%s] -> [%s]", name, value));
  }

  // Set default value if needed
  for (i = 0; config_options[i * ENTRIES_PER_CONFIG_OPTION] != NULL; i++) {
    default_value = config_options[i * ENTRIES_PER_CONFIG_OPTION + 2];
    if (ctx->config[i] == NULL && default_value != NULL) {
      ctx->config[i] = mg_strdup(default_value);
      DEBUG_TRACE(("Setting default: [%s] -> [%s]",
                   config_options[i * ENTRIES_PER_CONFIG_OPTION + 1],
                   default_value));
    }
  }

  // NOTE(lsm): order is important here. SSL certificates must
  // be initialized before listening ports. UID must be set last.
  if (!set_gpass_option(ctx) ||
#if !defined(NO_SSL)
      !set_ssl_option(ctx) ||
#endif
      !set_ports_option(ctx) ||
#if !defined(_WIN32)
      !set_uid_option(ctx) ||
#endif
      !set_acl_option(ctx)) {
    free_context(ctx);
    return NULL;
  }

#if !defined(_WIN32) && !defined(__SYMBIAN32__)
  // Ignore SIGPIPE signal, so if browser cancels the request, it
  // won't kill the whole process.
  (void) signal(SIGPIPE, SIG_IGN);
  // Also ignoring SIGCHLD to let the OS to reap zombies properly.
  (void) signal(SIGCHLD, SIG_IGN);
#endif // !_WIN32

  (void) pthread_mutex_init(&ctx->mutex, NULL);
  (void) pthread_cond_init(&ctx->cond, NULL);
  (void) pthread_cond_init(&ctx->sq_empty, NULL);
  (void) pthread_cond_init(&ctx->sq_full, NULL);

  // Start master (listening) thread
  start_thread(ctx, (mg_thread_func_t) master_thread, ctx);

  // Start worker threads
  for (i = 0; i < atoi(ctx->config[NUM_THREADS]); i++) {
    if (start_thread(ctx, (mg_thread_func_t) worker_thread, ctx) != 0) {
      cry(fc(ctx), "Cannot start worker thread: %d", ERRNO);
    } else {
      ctx->num_threads++;
    }
  }

  return ctx;
}


================================================
FILE: src/graphlab/ui/mongoose/mongoose.h
================================================
// Copyright (c) 2004-2011 Sergey Lyubka
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#ifndef MONGOOSE_HEADER_INCLUDED
#define  MONGOOSE_HEADER_INCLUDED

#include <stddef.h>


struct mg_context;     // Handle for the HTTP service itself
struct mg_connection;  // Handle for the individual connection


// This structure contains information about the HTTP request.
struct mg_request_info {
  void *user_data;       // User-defined pointer passed to mg_start()
  char *request_method;  // "GET", "POST", etc
  char *uri;             // URL-decoded URI
  char *http_version;    // E.g. "1.0", "1.1"
  char *query_string;    // URL part after '?' (not including '?') or NULL
  char *remote_user;     // Authenticated user, or NULL if no auth used
  char *log_message;     // Mongoose error log message, MG_EVENT_LOG only
  long remote_ip;        // Client's IP address
  int remote_port;       // Client's port
  int status_code;       // HTTP reply status code, e.g. 200
  int is_ssl;            // 1 if SSL-ed, 0 if not
  int num_headers;       // Number of headers
  struct mg_header {
    char *name;          // HTTP header name
    char *value;         // HTTP header value
  } http_headers[64];    // Maximum 64 headers
};

// Various events on which user-defined function is called by Mongoose.
enum mg_event {
  MG_NEW_REQUEST,   // New HTTP request has arrived from the client
  MG_HTTP_ERROR,    // HTTP error must be returned to the client
  MG_EVENT_LOG,     // Mongoose logs an event, request_info.log_message
  MG_INIT_SSL,      // Mongoose initializes SSL. Instead of mg_connection *,
                    // SSL context is passed to the callback function.
  MG_REQUEST_COMPLETE  // Mongoose has finished handling the request
};

// Prototype for the user-defined function. Mongoose calls this function
// on every MG_* event.
//
// Parameters:
//   event: which event has been triggered.
//   conn: opaque connection handler. Could be used to read, write data to the
//         client, etc. See functions below that have "mg_connection *" arg.
//   request_info: Information about HTTP request.
//
// Return:
//   If handler returns non-NULL, that means that handler has processed the
//   request by sending appropriate HTTP reply to the client. Mongoose treats
//   the request as served.
//   If handler returns NULL, that means that handler has not processed
//   the request. Handler must not send any data to the client in this case.
//   Mongoose proceeds with request handling as if nothing happened.
typedef void * (*mg_callback_t)(enum mg_event event,
                                struct mg_connection *conn,
                                const struct mg_request_info *request_info);


// Start web server.
//
// Parameters:
//   callback: user defined event handling function or NULL.
//   options: NULL terminated list of option_name, option_value pairs that
//            specify Mongoose configuration parameters.
//
// Side-effects: on UNIX, ignores SIGCHLD and SIGPIPE signals. If custom
//    processing is required for these, signal handlers must be set up
//    after calling mg_start().
//
//
// Example:
//   const char *options[] = {
//     "document_root", "/var/www",
//     "listening_ports", "80,443s",
//     NULL
//   };
//   struct mg_context *ctx = mg_start(&my_func, NULL, options);
//
// Please refer to http://code.google.com/p/mongoose/wiki/MongooseManual
// for the list of valid option and their possible values.
//
// Return:
//   web server context, or NULL on error.
struct mg_context *mg_start(mg_callback_t callback, void *user_data,
                            const char **options);


// Stop the web server.
//
// Must be called last, when an application wants to stop the web server and
// release all associated resources. This function blocks until all Mongoose
// threads are stopped. Context pointer becomes invalid.
void mg_stop(struct mg_context *);


// Get the value of particular configuration parameter.
// The value returned is read-only. Mongoose does not allow changing
// configuration at run time.
// If given parameter name is not valid, NULL is returned. For valid
// names, return value is guaranteed to be non-NULL. If parameter is not
// set, zero-length string is returned.
const char *mg_get_option(const struct mg_context *ctx, const char *name);


// Return array of strings that represent valid configuration options.
// For each option, a short name, long name, and default value is returned.
// Array is NULL terminated.
const char **mg_get_valid_option_names(void);


// Add, edit or delete the entry in the passwords file.
//
// This function allows an application to manipulate .htpasswd files on the
// fly by adding, deleting and changing user records. This is one of the
// several ways of implementing authentication on the server side. For another,
// cookie-based way please refer to the examples/chat.c in the source tree.
//
// If password is not NULL, entry is added (or modified if already exists).
// If password is NULL, entry is deleted.
//
// Return:
//   1 on success, 0 on error.
int mg_modify_passwords_file(const char *passwords_file_name,
                             const char *domain,
                             const char *user,
                             const char *password);

// Send data to the client.
int mg_write(struct mg_connection *, const void *buf, size_t len);


// Send data to the browser using printf() semantics.
//
// Works exactly like mg_write(), but allows to do message formatting.
// Note that mg_printf() uses internal buffer of size IO_BUF_SIZE
// (8 Kb by default) as temporary message storage for formatting. Do not
// print data that is bigger than that, otherwise it will be truncated.
int mg_printf(struct mg_connection *, const char *fmt, ...)
#ifdef __GNUC__
__attribute__((format(printf, 2, 3)))
#endif
;


// Send contents of the entire file together with HTTP headers.
void mg_send_file(struct mg_connection *conn, const char *path);


// Read data from the remote end, return number of bytes read.
int mg_read(struct mg_connection *, void *buf, size_t len);


// Get the value of particular HTTP header.
//
// This is a helper function. It traverses request_info->http_headers array,
// and if the header is present in the array, returns its value. If it is
// not present, NULL is returned.
const char *mg_get_header(const struct mg_connection *, const char *name);


// Get a value of particular form variable.
//
// Parameters:
//   data: pointer to form-uri-encoded buffer. This could be either POST data,
//         or request_info.query_string.
//   data_len: length of the encoded data.
//   var_name: variable name to decode from the buffer
//   buf: destination buffer for the decoded variable
//   buf_len: length of the destination buffer
//
// Return:
//   On success, length of the decoded variable.
//   On error, -1 (variable not found, or destination buffer is too small).
//
// Destination buffer is guaranteed to be '\0' - terminated. In case of
// failure, dst[0] == '\0'.
int mg_get_var(const char *data, size_t data_len,
               const char *var_name, char *buf, size_t buf_len);

// Fetch value of certain cookie variable into the destination buffer.
//
// Destination buffer is guaranteed to be '\0' - terminated. In case of
// failure, dst[0] == '\0'. Note that RFC allows many occurrences of the same
// parameter. This function returns only first occurrence.
//
// Return:
//   On success, value length.
//   On error, 0 (either "Cookie:" header is not present at all, or the
//   requested parameter is not found, or destination buffer is too small
//   to hold the value).
int mg_get_cookie(const struct mg_connection *,
                  const char *cookie_name, char *buf, size_t buf_len);


// Return Mongoose version.
const char *mg_version(void);


// MD5 hash given strings.
// Buffer 'buf' must be 33 bytes long. Varargs is a NULL terminated list of
// asciiz strings. When function returns, buf will contain human-readable
// MD5 hash. Example:
//   char buf[33];
//   mg_md5(buf, "aa", "bb", NULL);
void mg_md5(char *buf, ...);


// Compute MD5 of file
void mg_md5_file(char *buf, const char* file);


#endif // MONGOOSE_HEADER_INCLUDED


================================================
FILE: src/graphlab/ui/style.css
================================================
body {
    text-align: center;
}
#instrument_panel {
    text-align: center;
}

.metric_summary {
    display: inline-block;
    
}

.metric_summary .name {
    display: none;
    text-align: center;
}

.metric_summary .value {
    display: none;
    text-align: center;
}


.aggregate {
    display: inline-block;
}

.aggregate .chart {
   width: 400px;
   height: 200px;
}

.aggregate .name {
  display: none
}


.aggregate .chart {
   width: 400px;
   height: 200px;
}

.node {
    display: inline-block;
}


.node .name {
  display: none
}


.node .chart {
   width: 400px;
   height: 200px;
}


================================================
FILE: src/graphlab/util/CMakeLists.txt
================================================
project(GraphLab)

subdirs(generics)


================================================
FILE: src/graphlab/util/binary_parser.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BINARY_PARSER_HPP
#define GRAPHLAB_BINARY_PARSER_HPP

#include <iostream>
#include <fstream>

namespace graphlab {

  /**
   * \ingroup util_internal
   * A thin wrapper around ifstream to provide simplicity of reading
   * of binary data.
   * \see binary_output_stream
   */
  class binary_input_stream : public std::ifstream {
    typedef std::ifstream base_type;
    using base_type::bad;
  public:
    binary_input_stream(const char* fname) :
      base_type(fname, std::ios::binary | std::ios::in) {
      assert(bad() == false);
    }
    
    /**
     * Read an arbitrary type.
     */
    template<typename T> T read() {
      T t;
      base_type::read(reinterpret_cast<char*>(&t), sizeof(T));
      if(bad()) {
        std::cout << "Error reading file!" << std::endl;
        assert(false);
      }
      return t;
    }
    /**
     * Read an arbitrary type.
     */
    template<typename T> void read(T& ret) {
      base_type::read(reinterpret_cast<char*>(&ret), sizeof(T));
      if(bad()) {
        std::cout << "Error reading file!" << std::endl;
        assert(false);
      }
    }

    /**
     * Read an arbitrary type.
     */
    template<typename T> void read_vector(std::vector<T>& ret) {
      if(ret.empty()) return;
      base_type::read(reinterpret_cast<char*>(&(ret[0])), 
                      sizeof(T) * ret.size());
      if(bad()) {
        std::cout << "Error reading file!" << std::endl;
        assert(false);
      }
    }
  };


  /**
   * \ingroup util_internal
   * A thin wrapper around ifstream to provide simplicity of writing
   * of binary data.
   * \see binary_input_stream
   */
  class binary_output_stream : public std::ofstream {
  typedef std::ofstream base_type;
    using std::ofstream::bad;
  public:
    binary_output_stream(const char* fname) : 
    std::ofstream(fname, std::ios::binary | std::ios::out) {
      assert(bad() == false);
    }
    
    //! Write the arbitrary data type to file
    template<typename T> void write(T t) {
      base_type::write(reinterpret_cast<char*>(&t), sizeof(T));
      if(bad()) {
        std::cout << "Error writing file!" << std::endl;
        assert(false);
      }
    }
  }; // end of binary_output_stream

  
}


#endif


================================================
FILE: src/graphlab/util/blocking_queue.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BLOCKING_QUEUE_HPP
#define GRAPHLAB_BLOCKING_QUEUE_HPP


#include <list>
#include <deque>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab {

   /** 
    * \ingroup util
    * \brief Implements a blocking queue useful for producer/consumer models
    */
  template<typename T>
  class blocking_queue {
  protected:
    
    typedef typename std::deque<T> queue_type;

    bool m_alive;
    queue_type m_queue;
    mutex m_mutex;
    conditional m_conditional;
    conditional m_empty_conditional;
   
    volatile uint16_t sleeping;
    volatile uint16_t sleeping_on_empty;


  public:
    
    //! creates a blocking queue
    blocking_queue() : m_alive(true),sleeping(0),sleeping_on_empty(0) { }
    
    //! Add an element to the blocking queue
    inline void enqueue(const T& elem) {
      m_mutex.lock();
      m_queue.push_back(elem);
      // Signal threads waiting on the queue
      if (sleeping) m_conditional.signal();
      m_mutex.unlock();
    }

    //! Add an element to the blocking queue
    inline void enqueue_to_head(const T& elem) {
      m_mutex.lock();
      m_queue.push_front(elem);
      // Signal threads waiting on the queue
      if (sleeping) m_conditional.signal();
      m_mutex.unlock();
    }


    inline void enqueue_conditional_signal(const T& elem, size_t signal_at_size) {
      m_mutex.lock();
      m_queue.push_back(elem);
      // Signal threads waiting on the queue
      if (sleeping && m_queue.size() >= signal_at_size) m_conditional.signal();
      m_mutex.unlock();
    }


    bool empty_unsafe() {
      return m_queue.empty();
    }

    void begin_critical_section() {
      m_mutex.lock();
    }


    bool is_alive() {
      return m_alive;
    }

    void swap(queue_type &q) {
      m_mutex.lock();
      q.swap(m_queue);
      if (m_queue.empty() && sleeping_on_empty) {
        m_empty_conditional.signal();
      }
      m_mutex.unlock();
    }

    inline std::pair<T, bool> try_dequeue_in_critical_section() {
      T elem = T();
      // Wait while the queue is empty and this queue is alive
      if (m_queue.empty() || m_alive == false) {
        return std::make_pair(elem, false);
      }
      else {
        elem = m_queue.front();
        m_queue.pop_front();
        if (m_queue.empty() && sleeping_on_empty) {
          m_empty_conditional.signal();
        }
        return std::make_pair(elem, true);
      }
    }

    void end_critical_section() {
     m_mutex.unlock();
    }


    inline std::pair<T, bool> dequeue_and_begin_critical_section_on_success() {
      m_mutex.lock();
      T elem = T();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      while(m_queue.empty() && m_alive) {
        sleeping++;
        m_conditional.wait(m_mutex);
        sleeping--;
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
        elem = m_queue.front();
        m_queue.pop_front();
        if (m_queue.empty() && sleeping_on_empty) {
          m_empty_conditional.signal();
        }
      }
      if (!success) m_mutex.unlock(); 
      return std::make_pair(elem, success);
    }

    /// Returns immediately of queue size is >= immedeiate_size
    /// Otherwise, it will poll over 'ns' nanoseconds or on a signal
    /// until queue is not empty.
    inline bool timed_wait_for_data(size_t ns, size_t immediate_size) {
      m_mutex.lock();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      if (m_queue.size() < immediate_size) {
        do {
          sleeping++;
          m_conditional.timedwait_ns(m_mutex, ns);
          sleeping--;
        }while(m_queue.empty() && m_alive);
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
      }
      m_mutex.unlock();

      return success; 
    }


    /// Returns immediately of queue size is >= immedeiate_size
    /// Otherwise, it will poll over 'ns' nanoseconds or on a signal
    /// until queue is not empty.
    inline bool try_timed_wait_for_data(size_t ns, size_t immediate_size) {
      m_mutex.lock();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      if (m_queue.size() < immediate_size) {
        if (m_queue.empty() && m_alive) {
          sleeping++;
          m_conditional.timedwait_ns(m_mutex, ns);
          sleeping--;
        }
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
      }
      m_mutex.unlock();

      return success; 
    }


    inline bool wait_for_data() {

      m_mutex.lock();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      while(m_queue.empty() && m_alive) {
        sleeping++;
        m_conditional.wait(m_mutex);
        sleeping--;
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
      } 
      m_mutex.unlock();

      return success; 
    }


    /**
     * Blocks until an element is available in the queue 
     * or until stop_blocking() is called.
     * The return value is a pair of <T value, bool success>
     * If "success" if set, then "value" is valid and 
     * is an element popped from the queue.
     * If "success" is false, stop_blocking() was called 
     * and the queue has been destroyed.
     */
    inline std::pair<T, bool> dequeue() {

      m_mutex.lock();
      T elem = T();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      while(m_queue.empty() && m_alive) {
        sleeping++;
        m_conditional.wait(m_mutex);
        sleeping--;
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
        elem = m_queue.front();
        m_queue.pop_front();
        if (m_queue.empty() && sleeping_on_empty) {
          m_empty_conditional.signal();
        }
      } 
      m_mutex.unlock();

      return std::make_pair(elem, success);
    }

    /**
    * Returns an element if the queue has an entry.
    * returns [item, false] otherwise.
    */
    inline std::pair<T, bool> try_dequeue() {
      if (m_queue.empty() || m_alive == false) return std::make_pair(T(), false);
      m_mutex.lock();
      T elem = T();
      // Wait while the queue is empty and this queue is alive
      if (m_queue.empty() || m_alive == false) {
        m_mutex.unlock();
        return std::make_pair(elem, false);
      }
      else {
        elem = m_queue.front();
        m_queue.pop_front();
        if (m_queue.empty() && sleeping_on_empty) {
          m_empty_conditional.signal();
        }
      }
      m_mutex.unlock();

      return std::make_pair(elem, true);
    }

    //! Returns true if the queue is empty
    inline bool empty() { 
      m_mutex.lock();
      bool res = m_queue.empty();
      m_mutex.unlock();
      return res;
    }

    /** Wakes up all threads waiting on the queue whether 
        or not an element is available. Once this function is called,
        all existing and future dequeue operations will return with failure.
        Note that there could be elements remaining in the queue after 
        stop_blocking() is called. 
    */
    inline void stop_blocking() {
      m_mutex.lock();
      m_alive = false;
      m_conditional.broadcast();
      m_empty_conditional.broadcast();
      m_mutex.unlock();
    }

    /**
      Resumes operation of the blocking_queue. Future calls to
      dequeue will proceed as normal.
    */
    inline void start_blocking() {
      m_mutex.lock();
      m_alive = true;
      m_mutex.unlock();
    }
    
    //! get the current size of the queue
    inline size_t size() {
      return m_queue.size();
    }

    /**
     * The conceptual "reverse" of dequeue().
     * This function will block until the queue becomes empty, or 
     * until stop_blocking() is called.
     * Returns true on success. 
     * Returns false if the queue is no longer alive
    */
    bool wait_until_empty() {
      m_mutex.lock();
      // if the queue still has elements in it while I am still alive, wait
      while (m_queue.empty() == false && m_alive == true) {
        sleeping_on_empty++;
        m_empty_conditional.wait(m_mutex);
        sleeping_on_empty--;
      }
      m_mutex.unlock();
      // if I am alive, the queue must be empty. i.e. success
      // otherwise I am dead
      return m_alive;
    }
    
    /**
     * Causes any threads currently blocking on a dequeue to wake up
     * and evaluate the state of the queue. If the queue is empty,
     * the threads will return back to sleep immediately. If the queue
     * is destroyed through stop_blocking, all threads will return. 
     */
    void broadcast() {
      m_mutex.lock();
      m_conditional.broadcast();
      m_mutex.unlock();
    }
    
    
    /**
     * Causes any threads blocking on "wait_until_empty()" to wake
     * up and evaluate the state of the queue. If the queue is not empty,
     * the threads will return back to sleep immediately. If the queue
     * is empty, all threads will return.
    */
    void broadcast_blocking_empty() {
      m_mutex.lock();
      m_empty_conditional.broadcast();
      m_mutex.unlock();
    }    

    
    ~blocking_queue() {
      m_alive = false;
      broadcast();
      broadcast_blocking_empty();
    }    
  }; // end of blocking_queue class
  

} // end of namespace graphlab

#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/util/bloom_filter.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef BLOOM_FILTER_HPP
#define BLOOM_FILTER_HPP
#include <graphlab/util/dense_bitset.hpp>

template <size_t len, size_t probes>
class fixed_bloom_filter {
 private:
  fixed_dense_bitset<len> bits;
 public:
  inline fixed_bloom_filter() { }
  
  inline void clear() {
    bits.clear();
  }
  
  inline void insert(uint64_t i) {
    for (size_t i = 0;i < probes; ++i) {
      bits.set_bit_unsync(i % len);
      i = i * 0x9e3779b97f4a7c13LL;
    }
  }
  
  inline bool may_contain(size_t i) {
    for (size_t i = 0;i < probes; ++i) {
      if (bits.get_bit_unsync(i % len) == false) return false;
      i = i * 0x9e3779b97f4a7c13LL;
    }
    return true;
  }

};

#endif


================================================
FILE: src/graphlab/util/branch_hints.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_BRANCH_HINTS_HPP
#define GRAPHLAB_UTIL_BRANCH_HINTS_HPP

#define __likely__(x)       __builtin_expect((x),1)
#define __unlikely__(x)     __builtin_expect((x),0)

#endif //GRAPHLAB_UTIL_BRANCH_HINTS_HPP


================================================
FILE: src/graphlab/util/cache.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved.  
 *
 * Contributed under the iCLA for:
 *    Joseph Gonzalez (jegonzal@yahoo-inc.com) 
 *
 */

#ifndef GRAPHLAB_CACHE_HPP
#define GRAPHLAB_CACHE_HPP

#include <algorithm>
#include <vector>
#include <boost/functional/hash.hpp>

#include <boost/bimap.hpp>
#include <boost/bimap/list_of.hpp>
#include <boost/bimap/unordered_set_of.hpp>


#include <graphlab/logger/assertions.hpp>


namespace graphlab {
  namespace cache { 

    // template<typename Cache, typename Source>
    // struct bind {
    //   typedef Cache cache_type;
    //   typedef typename cache_type::key_type key_type;
    //   typedef typename cache_type::value_type value_type;
    //   cache_type cache;
    //   Source& source;
    //   bind(Source& source, size_t capacity = 100) : 
    //     source(source), capacity(capacity) { }
    //   value_type get(const key_type& key) { return cache.get(key, source); }
    // }; // end of bind


    template<typename Key, typename Value>
    class lru {
    public:
      typedef Key key_type;
      typedef Value value_type;
      
      typedef boost::bimaps::bimap<
        boost::bimaps::unordered_set_of<key_type>, 
        boost::bimaps::list_of<value_type> > 
      cache_map_type;
      
      typedef typename cache_map_type::iterator iterator_type;
      typedef typename cache_map_type::value_type pair_type;
      
    private:
      mutable cache_map_type cache_map;
      
      
    public:

      lru(size_t cache_reserve = 1024) {
        cache_map.left.rehash(cache_reserve); 
      }

      
      size_t size() const { return cache_map.size(); }
      size_t empty() const { return size() == 0; }

      iterator_type begin() { return cache_map.begin(); }
      iterator_type end() { return cache_map.end(); }
      
      std::pair<key_type, value_type> evict() {
        ASSERT_FALSE(cache_map.empty());
        typedef typename cache_map_type::right_iterator iterator_type;
        iterator_type iter = cache_map.right.begin();
        const std::pair<key_type, value_type> 
          result(iter->get_left(), iter->get_right());
        cache_map.right.erase(iter);
        return result;
      } // end of evict

      std::pair<bool, value_type> evict(const key_type& key) {
        typedef typename cache_map_type::left_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        if(iter == cache_map.left.end()) 
          return std::make_pair(false, value_type());
        const value_type result = iter->get_right();
        cache_map.left.erase(iter);
        return std::make_pair(true, result);  
      } // end of evict(key)

      bool evict(const key_type& key, value_type& ret_value) {
        typedef typename cache_map_type::left_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        if(iter == cache_map.left.end()) return false;
        ret_value = iter->get_right();
        cache_map.left.erase(iter);
        return true;
      } // end of evict(key)
      

      bool contains(const key_type& key) const {
        typedef typename cache_map_type::left_const_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        return iter != cache_map.left.end();
      } // end of contains


      // value_type* find(const key_type& key) { 

      //   return cache_map.find(key); 
      // }

      value_type& operator[](const key_type& key) {
        typedef typename cache_map_type::left_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        if(iter != cache_map.left.end()) { // already in cache
          // move it to the end
          cache_map.right.relocate(cache_map.right.end(), 
                                   cache_map.project_right(iter));
          return iter->get_right();
        } else {
          // add it to the cache
          // Get the true entry from the source
          typedef typename cache_map_type::value_type pair_type;
          cache_map.insert(pair_type(key, value_type()));
          return cache_map.left[key];
        }
      } // end of oeprator[]

      const value_type& operator[](const key_type& key) const {
        typedef typename cache_map_type::left_const_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        if(iter != cache_map.left.end()) { // already in cache
          // move it to the end
          cache_map.right.relocate(cache_map.right.end(), 
                                   cache_map.project_right(iter));
          return iter->get_right();
        }
        logstream(LOG_FATAL) << "Key not found!" << std::endl;
        assert(false);
      } // end of oeprator[]

      bool get(const key_type& key, value_type& ret_value) {
        typedef typename cache_map_type::left_iterator iterator_type;
        iterator_type iter = cache_map.left.find(key);
        if(iter != cache_map.left.end()) { // already in cache
          ret_value = iter->get_right();
          // move it to the end
          cache_map.right.relocate(cache_map.right.end(), 
                                   cache_map.project_right(iter));
          return true;
         } else return false;
      } // end of get

    }; // end of class lru


    template<typename Key, typename Value>
    class associative {
    public:
      typedef Key key_type;
      typedef Value value_type;
       
      
    private:
      std::vector<key_type>   keys;
      std::vector<value_type> values;
      std::vector<bool>       is_set;
      size_t size_;
      boost::hash<key_type>   hash_function;
      
    public:

      associative(size_t cache_size = 1024) :
        keys(cache_size), values(cache_size), 
        is_set(cache_size), size_(0) { }
      
      size_t size() { return size_; }
      

      bool evict_slot(const key_type& key, 
                      key_type& ret_key, value_type& ret_value) {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index]) {
          ret_key = keys[index]; ret_value = values[index];
          is_set[index] = false;
          return true;
        } else return false;
      } // end of evict_slot
        
      std::pair<bool, value_type> evict(const key_type& key) {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index] && key[index] == key) {
          is_set[index] = false;
          return std::make_pair(true, values[index]);
        } else {
          return std::make_pair(false, value_type());
        }
      } // end of evict(key)

      bool evict(const key_type& key, value_type& ret_value) {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index] && key[index] == key) {
          is_set[index] = false;
          ret_value = values[index];
          return true;
        } else {
          return false;
        }
      }      

      bool contains(const key_type& key) const {
        const size_t index = hash_function(key) % keys.size();
        return is_set[index] && keys[index] == key;
      } // end of contains


      value_type& operator[](const key_type& key) {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index]) {
          ASSERT_TRUE(key == keys[index]);
          return values[index];
        } else {
          keys[index] = key;
          is_set[index] = true;
          values[index] = value_type();
          return values[index];
        }
      } // end of oeprator[]

      const value_type& operator[](const key_type& key) const {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index]) {
          ASSERT_TRUE(key == keys[index]);
          return values[index];
        } else {
          logstream(LOG_FATAL) << "Key not found!" << std::endl;
          return value_type();
        }
      } // end of oeprator[]

      bool get(const key_type& key, value_type& ret_value) {
        const size_t index = hash_function(key) % keys.size();
        if(is_set[index] && keys[index] == key) {
          ret_value = values[index];
          return true;
        } else {
          return false;
        }     
      } // end of get

    }; // end of class associative


  }; // end of cache namespace 
}; // end of graphlab namespace

#endif


================================================
FILE: src/graphlab/util/chandy_misra.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_LOCAL_CHANDY_MISRA_HPP
#define GRAPHLAB_LOCAL_CHANDY_MISRA_HPP
#include <vector>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

template <typename GraphType>
class chandy_misra {
 public:
  GraphType &graph;
  /*
   * Each "fork" is one character.
   * bit 0: owner. if 0 is src. if 1 is target
   * bit 1: clean = 0, dirty = 1
   * bit 2: owner 0 request
   * bit 3: owner 1 request
   */
  std::vector<unsigned char> forkset;
  enum { OWNER_BIT = 1,
         DIRTY_BIT = 2,
         REQUEST_0 = 4,
         REQUEST_1 = 8 };
  enum {OWNER_SOURCE = 0, OWNER_TARGET = 1};
  inline unsigned char request_bit(bool owner) {
    return owner ? REQUEST_1 : REQUEST_0;
  }

  struct philosopher {
    vertex_id_type num_edges;
    vertex_id_type forks_acquired;
    simple_spinlock lock;
    unsigned char state;
  };
  std::vector<philosopher> philosopherset;
  /*
   * Possible values for the philosopher state
   */
  enum {
    THINKING = 0,
    HUNGRY = 1,
    EATING = 2
  };

  /** Places a request for the fork. Requires fork to be locked */
  inline void request_for_fork(size_t forkid, bool nextowner) {
    forkset[forkid] |= request_bit(nextowner);
  }

  inline bool fork_owner(size_t forkid) {
    return forkset[forkid] & OWNER_BIT;
  }

  inline bool fork_dirty(size_t forkid) {
    return !!(forkset[forkid] & DIRTY_BIT);
  }

  inline void dirty_fork(size_t forkid) {
    forkset[forkid] |= DIRTY_BIT;
  }
  
  /** changes the fork owner if it is dirty, and the other side
   *  has requested for it. Fork must be locked.
   * Returns true if fork moved. false otherwise.
   */
  inline void advance_fork_state_on_lock(size_t forkid,
                                        vertex_id_type source,
                                        vertex_id_type target) {
    
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    if (currentowner == OWNER_SOURCE) {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if (philosopherset[source].state != EATING &&
          (forkset[forkid] & DIRTY_BIT) &&
          (forkset[forkid] & REQUEST_1)) {
        //  change the owner and clean the fork)
        
        forkset[forkid] = OWNER_TARGET;
        if (philosopherset[source].state == HUNGRY) {
          forkset[forkid] |= REQUEST_0;
        }
        philosopherset[source].forks_acquired--;
        philosopherset[target].forks_acquired++;        
      }
    }
    else {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if (philosopherset[target].state != EATING &&
          (forkset[forkid] & DIRTY_BIT) &&
          (forkset[forkid] & REQUEST_0)) {
        //  change the owner and clean the fork)
        
        forkset[forkid] = OWNER_SOURCE;
        if (philosopherset[target].state == HUNGRY) {
          forkset[forkid] |= REQUEST_1;
        }
        philosopherset[source].forks_acquired++;
        philosopherset[target].forks_acquired--;
      }
    }
  }
  
  
  inline bool advance_fork_state_on_unlock(size_t forkid,
                                         vertex_id_type source,
                                         vertex_id_type target) {
    
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    if (currentowner == OWNER_SOURCE) {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if ((forkset[forkid] & DIRTY_BIT) &&
        (forkset[forkid] & REQUEST_1)) {
        //  change the owner and clean the fork)
        // keep my request bit if any
        forkset[forkid] = OWNER_TARGET;
        philosopherset[source].forks_acquired--;
        philosopherset[target].forks_acquired++;
        return true;
      }
    }
    else {
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if ((forkset[forkid] & DIRTY_BIT) &&
        (forkset[forkid] & REQUEST_0)) {
        //  change the owner and clean the fork)
        // keep my request bit if any
        forkset[forkid] = OWNER_SOURCE;
        philosopherset[source].forks_acquired++;
        philosopherset[target].forks_acquired--;
        return true; 
      }
    }
    return false;
  }
  
  void compute_initial_fork_arrangement() {
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      philosopherset[i].num_edges = graph.num_in_edges(i) +
                                    graph.num_out_edges(i);
      philosopherset[i].state = THINKING;
      philosopherset[i].forks_acquired = 0;
    }
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      foreach(typename GraphType::edge_type edge, graph.in_edges(i)) {
        if (edge.source() > edge.target()) {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | OWNER_TARGET;
          philosopherset[edge.target()].forks_acquired++;
        }
        else {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | OWNER_SOURCE;
          philosopherset[edge.source()].forks_acquired++;
        }
      }
    }
  }

  void compute_initial_fork_arrangement(const std::vector<vertex_id_type> &altvids) {
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      philosopherset[i].num_edges = graph.num_in_edges(i) +
                                    graph.num_out_edges(i);
      philosopherset[i].state = THINKING;
      philosopherset[i].forks_acquired = 0;
    }
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      foreach(typename GraphType::edge_type edge, graph.in_edges(i)) {
        if (altvids[edge.source()] > altvids[edge.target()]) {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | OWNER_TARGET;
          philosopherset[edge.target()].forks_acquired++;
        }
        else {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | OWNER_SOURCE;
          philosopherset[edge.source()].forks_acquired++;
        }
      }
    }
  }


  /**
   * We already have v1, we want to acquire v2.
   * When this function returns, both v1 and v2 locks are acquired
   */
  void try_acquire_edge_with_backoff(vertex_id_type v1,
                                     vertex_id_type v2) {
    if (v1 < v2) {
      philosopherset[v2].lock.lock();
    }
    else if (!philosopherset[v2].lock.try_lock()) {
        philosopherset[v1].lock.unlock();
        philosopherset[v2].lock.lock();
        philosopherset[v1].lock.lock();
    }
  }

  
 public:
  inline chandy_misra(GraphType &graph):graph(graph) {
    forkset.resize(graph.num_edges(), 0);
    philosopherset.resize(graph.num_vertices());
    compute_initial_fork_arrangement();
  }

  inline chandy_misra(GraphType &graph, 
                      const std::vector<vertex_id_type> &altvids):graph(graph) {
    forkset.resize(graph.num_edges(), 0);
    philosopherset.resize(graph.num_vertices());
    compute_initial_fork_arrangement(altvids);
  }

  inline const vertex_id_type invalid_vid() const {
    return (vertex_id_type)(-1);
  }

  inline vertex_id_type make_philosopher_hungry(vertex_id_type p_id) {
    vertex_id_type retval = vertex_id_type(-1);
    philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)THINKING);
    philosopherset[p_id].state = HUNGRY;

    // now try to get all the forks. lock one edge at a time
    // using the backoff strategy
    //std::cout << "vertex " << p_id << std::endl;
    //std::cout << "in edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.target(), edge.source());
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      size_t edgeid = graph.edge_id(edge);
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_SOURCE) {
        request_for_fork(edgeid, OWNER_TARGET);        
        advance_fork_state_on_lock(edgeid, edge.source(), edge.target());
      }
      philosopherset[edge.source()].lock.unlock();
    }
    //std::cout << "out edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
 
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_TARGET) {
        request_for_fork(edgeid, OWNER_SOURCE);
        advance_fork_state_on_lock(edgeid, edge.source(), edge.target());
      }
      philosopherset[edge.target()].lock.unlock();
    }

    // if I got all forks I can eat
    if (philosopherset[p_id].forks_acquired ==
                  philosopherset[p_id].num_edges) {
      philosopherset[p_id].state = EATING;
      // signal eating
      retval = p_id;
    }
    philosopherset[p_id].lock.unlock();
    return retval;
  }

  inline std::vector<vertex_id_type> philosopher_stops_eating(size_t p_id) {
    std::vector<vertex_id_type> retval;
    philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)EATING);
    philosopherset[p_id].state = THINKING;

    // now forks are dirty
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.target(), edge.source());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.source();
      dirty_fork(edgeid);
      advance_fork_state_on_unlock(edgeid, edge.source(), edge.target());
      if (philosopherset[other].state == HUNGRY && 
            philosopherset[other].forks_acquired ==
                philosopherset[other].num_edges) {
        philosopherset[other].state = EATING;
        // signal eating on other
        retval.push_back(other);
      }
      philosopherset[other].lock.unlock();
    }

    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.target();
      dirty_fork(edgeid);
      advance_fork_state_on_unlock(edgeid, edge.source(), edge.target());
      if (philosopherset[other].state == HUNGRY && 
            philosopherset[other].forks_acquired ==
                philosopherset[other].num_edges) {
        philosopherset[other].state = EATING;
        // signal eating on other
        retval.push_back(other);
      }
      philosopherset[other].lock.unlock();
    }
    
    philosopherset[p_id].lock.unlock();
    return retval;
  }

  inline std::vector<vertex_id_type> cancel_eating_philosopher(vertex_id_type p_id) {
    std::vector<vertex_id_type> retval;
    philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    if(philosopherset[p_id].state != EATING) {
      philosopherset[p_id].lock.unlock();
      return retval;
    }
    philosopherset[p_id].state = HUNGRY;

    // now forks are dirty
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.target(), edge.source());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.source();
      if (fork_dirty(edgeid)) {
        advance_fork_state_on_unlock(edgeid, edge.source(), edge.target());
        if (philosopherset[other].state == HUNGRY && 
              philosopherset[other].forks_acquired ==
                  philosopherset[other].num_edges) {
          philosopherset[other].state = EATING;
          // signal eating on other
          retval.push_back(other);
        }
      }
      philosopherset[other].lock.unlock();
    }

    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.target();
      if (fork_dirty(edgeid)) {
        advance_fork_state_on_unlock(edgeid, edge.source(), edge.target());
        if (philosopherset[other].state == HUNGRY && 
              philosopherset[other].forks_acquired ==
                  philosopherset[other].num_edges) {
          philosopherset[other].state = EATING;
          // signal eating on other
          retval.push_back(other);
        }
      }
      philosopherset[other].lock.unlock();    
    }
        // if I got all forks I can eat
    if (philosopherset[p_id].forks_acquired ==
                  philosopherset[p_id].num_edges) {
      philosopherset[p_id].state = EATING;
      // signal eating
      retval.push_back(p_id);
    }
     philosopherset[p_id].lock.unlock();    
     return retval;
  }


  void no_locks_consistency_check() {
    // make sure all forks are dirty
    for (size_t i = 0;i < forkset.size(); ++i) ASSERT_TRUE(fork_dirty(i));
    // all philosophers are THINKING
    for (size_t i = 0;i < philosopherset.size(); ++i) ASSERT_TRUE(philosopherset[i].state == THINKING);
  }

  void complete_consistency_check() {
    for (vertex_id_type v = 0; v < graph.num_vertices(); ++v) {
      // count the number of forks I own
      size_t numowned = 0;
      size_t numowned_clean = 0;
      foreach(typename GraphType::edge_type edge, graph.in_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_TARGET) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      foreach(typename GraphType::edge_type edge, graph.out_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_SOURCE) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }

      ASSERT_EQ(philosopherset[v].forks_acquired, numowned);
      if (philosopherset[v].state == THINKING) {
        ASSERT_EQ(numowned_clean, 0);
      }
      else if (philosopherset[v].state == HUNGRY) {
        ASSERT_NE(philosopherset[v].num_edges, philosopherset[v].forks_acquired);
        // any fork I am unable to acquire. Must be clean, and the other person 
        // must be eating or hungry
        foreach(typename GraphType::edge_type edge, graph.in_edges(v)) {
          size_t edgeid = graph.edge_id(edge);
          // not owned
          if (fork_owner(edgeid) == OWNER_SOURCE) {
            if (philosopherset[edge.source()].state != EATING) {
              if (fork_dirty(edgeid)) {
                std::cout << (int)(forkset[edgeid]) << " " 
                          << (int)philosopherset[edge.source()].state 
                          << "->" << (int)philosopherset[edge.target()].state 
                          << std::endl;
                ASSERT_FALSE(fork_dirty(edgeid));
              }
            }
            ASSERT_NE(philosopherset[edge.source()].state, (int)THINKING);
          }
        }
        foreach(typename GraphType::edge_type edge, graph.out_edges(v)) {
          size_t edgeid = graph.edge_id(edge);
          if (fork_owner(edgeid) == OWNER_TARGET) {
            if (philosopherset[edge.target()].state != EATING) {
              if (fork_dirty(edgeid)) {
                std::cout << (int)(forkset[edgeid]) << " " 
                          << (int)philosopherset[edge.source()].state 
                          << "->" 
                          << (int)philosopherset[edge.target()].state 
                          << std::endl;
                ASSERT_FALSE(fork_dirty(edgeid));
              }
            }
            ASSERT_NE(philosopherset[edge.target()].state, (int)THINKING);
          }
        }

      }
      else if (philosopherset[v].state == EATING) {
        ASSERT_EQ(philosopherset[v].forks_acquired, philosopherset[v].num_edges);
      }
    }
  }
};

}

#include <graphlab/macros_undef.hpp>

#endif

================================================
FILE: src/graphlab/util/chandy_misra2.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_LOCAL_CHANDY_MISRA_HPP
#define GRAPHLAB_LOCAL_CHANDY_MISRA_HPP
#include <vector>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

template <typename GraphType>
class chandy_misra {
 private:
  GraphType &graph;
  /*
   * Each "fork" is one character.
   * bit 0: owner. if 0 is src. if 1 is target
   * bit 1: clean = 0, dirty = 1
   * bit 2: owner 0 request
   * bit 3: owner 1 request
   */
  std::vector<unsigned char> forkset;
  enum { OWNER_BIT = 1,
         DIRTY_BIT = 2,
         REQUEST_0 = 4,
         REQUEST_1 = 8 };
  enum {OWNER_SOURCE = 0, OWNER_TARGET = 1};
  inline unsigned char request_bit(bool owner) {
    return owner ? REQUEST_1 : REQUEST_0;
  }

  struct philosopher {
    vertex_id_type num_edges;
    atomic<vertex_id_type> forks_acquired;
    simple_spinlock lock;
    unsigned char state;
    bool atomic_eat() {
      if (num_edges == forks_acquired.value) {
        return atomic_compare_and_swap(state, 
                                       (unsigned char)HUNGRY, 
                                       (unsigned char)EATING);
      }
      return false;
    }
  };
  std::vector<philosopher> philosopherset;
  /*
   * Possible values for the philosopher state
   */
  enum {
    THINKING = 0,
    HUNGRY = 1,
    EATING = 2
  };

  /** Places a request for the fork. Requires fork to be locked */
  inline void request_for_fork(size_t forkid, bool nextowner) {
    forkset[forkid] |= request_bit(nextowner);
  }

  inline bool fork_owner(size_t forkid) {
    return forkset[forkid] & OWNER_BIT;
  }

  inline bool fork_dirty(size_t forkid) {
    return !!(forkset[forkid] & DIRTY_BIT);
  }

  inline void dirty_fork(size_t forkid) {
    forkset[forkid] |= DIRTY_BIT;
  }
  
  /** changes the fork owner if it is dirty, and the other side
   *  has requested for it. Fork must be locked.
   * Returns true if fork moved. false otherwise.
   */
  inline bool advance_fork_state_on_lock(size_t forkid,
                                        vertex_id_type source,
                                        vertex_id_type target) {
    
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    // edge_ids for the request bits
    unsigned char my_request_bit = request_bit(currentowner);
    unsigned char other_request_bit = request_bit(!currentowner);

    bool current_owner_is_eating =
        (currentowner == OWNER_SOURCE && philosopherset[source].state == EATING) ||
        (currentowner == OWNER_TARGET && philosopherset[target].state == EATING);
    bool current_owner_is_hungry =
        (currentowner == OWNER_SOURCE && philosopherset[source].state == HUNGRY) ||
        (currentowner == OWNER_TARGET && philosopherset[target].state == HUNGRY);
        
    // if the current owner is not eating, and the
    // fork is dirty and other side has placed a request
    if (current_owner_is_eating == false &&
        (forkset[forkid] & DIRTY_BIT) &&
        (forkset[forkid] & other_request_bit)) {
      //  change the owner and clean the fork)
      
      forkset[forkid] = (!currentowner);
      if (current_owner_is_hungry) {
        forkset[forkid] |= my_request_bit;
      }
      return true;
    }
    return false;
  }
  
  
  inline bool advance_fork_state_on_unlock(size_t forkid,
                                         vertex_id_type source,
                                         vertex_id_type target) {
    
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    // edge_ids for the request bits
    unsigned char my_request_bit = request_bit(currentowner);
    unsigned char other_request_bit = request_bit(!currentowner);
    
    // if the current owner is not eating, and the
    // fork is dirty and other side has placed a request
    if ((forkset[forkid] & DIRTY_BIT) &&
      (forkset[forkid] & other_request_bit)) {
      //  change the owner and clean the fork)
      // keep my request bit if any
      forkset[forkid] = (forkset[forkid] & my_request_bit) | (!currentowner);
      return true;
    }
    return false;
  }
  
  void compute_initial_fork_arrangement() {
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      philosopherset[i].num_edges = graph.num_in_edges(i) +
                                    graph.num_out_edges(i);
      philosopherset[i].state = THINKING;
      foreach(typename GraphType::edge_type edge, graph.in_edges(i)) {
        if (edge.source() > edge.target()) {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | 1;
        }
        else {
          forkset[graph.edge_id(edge)] = DIRTY_BIT;
        }
      }
    }
  }


  /**
   * We already have v1, we want to acquire v2.
   * When this function returns, both v1 and v2 locks are acquired
   */
  void try_acquire_edge_with_backoff(vertex_id_type v1,
                                     vertex_id_type v2) {
    if (v1 < v2) {
      philosopherset[v2].lock.lock();
    }
    else if (!philosopherset[v2].lock.try_lock()) {
        philosopherset[v1].lock.unlock();
        philosopherset[v2].lock.lock();
        philosopherset[v1].lock.lock();
    }
  }

  
 public:
  inline chandy_misra(GraphType &graph):graph(graph) {
    forkset.resize(graph.num_edges(), 0);
    philosopherset.resize(graph.num_vertices());
    compute_initial_fork_arrangement();
  }

  inline const vertex_id_type invalid_vid() const {
    return (vertex_id_type)(-1);
  }

  inline vertex_id_type make_philosopher_hungry(vertex_id_type p_id) {
    vertex_id_type retval = vertex_id_type(-1);
    philosopherset[p_id].lock.lock();
    philosopherset[p_id].forks_acquired.value = 0;
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)THINKING);
    philosopherset[p_id].state = HUNGRY;

    // now try to get all the forks. lock one edge at a time
    // using the backoff strategy
    //std::cout << "vertex " << p_id << std::endl;
    //std::cout << "in edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      try_acquire_edge_with_backoff(edge.target(), edge.source());
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      size_t edgeid = graph.edge_id(edge);
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_SOURCE) {
        request_for_fork(edgeid, OWNER_TARGET);
        
        philosopherset[p_id].forks_acquired.inc(
                advance_fork_state_on_lock(edgeid, edge.source(), edge.target()));
      }
      else {
        philosopherset[p_id].forks_acquired.inc();
      }
      philosopherset[edge.source()].lock.unlock();
    }
    //std::cout << "out edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
 
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_TARGET) {
        request_for_fork(edgeid, OWNER_SOURCE);
        philosopherset[p_id].forks_acquired.inc(
            advance_fork_state_on_lock(edgeid, edge.source(), edge.target()));
      }
      else {
        philosopherset[p_id].forks_acquired.inc();
      }
      philosopherset[edge.target()].lock.unlock();
    }

    // if I got all forks I can eat
    if (philosopherset[p_id].atomic_eat()) {
      // signal eating
      retval = p_id;
    }
    philosopherset[p_id].lock.unlock();
    return retval;
  }

  inline std::vector<vertex_id_type> philosopher_stops_eating(size_t p_id) {
    std::vector<vertex_id_type> retval;
    philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)EATING);
    // now forks are dirty
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      //try_acquire_edge_with_backoff(edge.target(), edge.source());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.source();
      dirty_fork(edgeid);
      philosopherset[other].forks_acquired.inc(
        advance_fork_state_on_unlock(edgeid, edge.source(), edge.target()));
      if (philosopherset[other].atomic_eat()) {
        // signal eating on other
        retval.push_back(other);
      }
      //philosopherset[edge.source()].lock.unlock();
    }

    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      //try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.target();
      dirty_fork(edgeid);
      philosopherset[other].forks_acquired.inc(
                  advance_fork_state_on_unlock(edgeid, edge.source(), edge.target()));
      if (philosopherset[other].atomic_eat()) {
        // signal eating on other
        retval.push_back(other);
      }
      //philosopherset[other].lock.unlock();
    }
    philosopherset[p_id].state = THINKING;
    
    philosopherset[p_id].lock.unlock();
    return retval;
  }

  void no_locks_consistency_check() {
    // make sure all forks are dirty
    for (size_t i = 0;i < forkset.size(); ++i) ASSERT_TRUE(fork_dirty(i));
    // all philosophers are THINKING
    for (size_t i = 0;i < philosopherset.size(); ++i) ASSERT_TRUE(philosopherset[i].state == THINKING);
  }

  void complete_consistency_check() {
    for (vertex_id_type v = 0; v < graph.num_vertices(); ++v) {
      // count the number of forks I own
      size_t numowned = 0;
      size_t numowned_clean = 0;
      foreach(typename GraphType::edge_type edge, graph.in_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_TARGET) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      foreach(typename GraphType::edge_type edge, graph.out_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_SOURCE) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      
      if (philosopherset[v].state == THINKING) {
        ASSERT_EQ(numowned_clean, 0);
      }
      else if (philosopherset[v].state == HUNGRY) {
        ASSERT_EQ(philosopherset[v].forks_acquired.value, numowned);
      }
      else if (philosopherset[v].state == EATING) {
        ASSERT_EQ(philosopherset[v].forks_acquired.value, philosopherset[v].num_edges);
        ASSERT_EQ(philosopherset[v].forks_acquired.value, numowned);
      }
    }
  }
};

}

#include <graphlab/macros_undef.hpp>

#endif

================================================
FILE: src/graphlab/util/chandy_misra_lockfree.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_LOCAL_CHANDY_MISRA_LOCKFREE_HPP
#define GRAPHLAB_LOCAL_CHANDY_MISRA_LOCKFREE_HPP
#include <vector>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/macros_def.hpp>
namespace graphlab {

template <typename GraphType>
class chandy_misra_lockfree {
 private:
  GraphType &graph;
  /*
   * Each "fork" is one character.
   * bit 0: owner. if 0 is src. if 1 is target
   * bit 1: clean = 0, dirty = 1
   * bit 2: owner 0 request
   * bit 3: owner 1 request
   */
  std::vector<unsigned char> forkset;
  enum { OWNER_BIT = 1,
         DIRTY_BIT = 2,
         REQUEST_0 = 4,
         REQUEST_1 = 8 };
  enum {OWNER_SOURCE = 0, OWNER_TARGET = 1};
  static inline unsigned char request_bit(bool owner) {
    return owner ? REQUEST_1 : REQUEST_0;
  }

  struct philosopher {
    vertex_id_type num_edges;
    atomic<vertex_id_type> forks_acquired;
    simple_spinlock lock;
    unsigned char state;
    bool atomic_eat() {
      if (num_edges == forks_acquired.value) {
        return atomic_compare_and_swap(state, (unsigned char)HUNGRY, (unsigned char)EATING);
      }
      return false;
    }
  };
  std::vector<philosopher> philosopherset;
  /*
   * Possible values for the philosopher state
   */
  enum {
    THINKING = 0,
    HUNGRY = 1,
    EATING = 2
  };

  /** Places a request for the fork. Requires fork to be locked */
  inline void request_for_fork(size_t forkid, bool nextowner) {
    __sync_fetch_and_or(&forkset[forkid], request_bit(nextowner)); 
  }

  inline bool fork_owner(size_t forkid) {
    return forkset[forkid] & OWNER_BIT;
  }

  inline bool fork_dirty(size_t forkid) {
    return !!(forkset[forkid] & DIRTY_BIT);
  }

  inline void dirty_fork(size_t forkid) {
    __sync_fetch_and_or(&forkset[forkid], (unsigned char)DIRTY_BIT);
  }
  
  /** changes the fork owner if it is dirty, and the other side
   *  has requested for it. Fork must be locked.
   * Returns true if fork moved. false otherwise.
   */
  inline bool advance_fork_state_on_lock(size_t forkid,
                                        vertex_id_type source,
                                        vertex_id_type target) {
    while(1) {
      unsigned char forkval = forkset[forkid];
      unsigned char currentowner = forkval & OWNER_BIT;
      // edge_ids for the request bits
      unsigned char my_request_bit = request_bit(currentowner);
      unsigned char other_request_bit = request_bit(!currentowner);

      bool current_owner_is_eating =
          (currentowner == OWNER_SOURCE && philosopherset[source].state == EATING) ||
          (currentowner == OWNER_TARGET && philosopherset[target].state == EATING);
      bool current_owner_is_hungry =
          (currentowner == OWNER_SOURCE && philosopherset[source].state == HUNGRY) ||
          (currentowner == OWNER_TARGET && philosopherset[target].state == HUNGRY);
          
      // if the current owner is not eating, and the
      // fork is dirty and other side has placed a request
      if (current_owner_is_eating == false &&
          (forkval & DIRTY_BIT) &&
          (forkval & other_request_bit)) {
        //  change the owner and clean the fork)
        unsigned char newforkval = (!currentowner);
        if (current_owner_is_hungry) {
          newforkval |= my_request_bit;
        }
        
        if (atomic_compare_and_swap(forkset[forkid], forkval, newforkval)) {
          return true;
        }
      }
      else {
        return false;
      }
    }
  }
  
  
  inline bool advance_fork_state_on_unlock(size_t forkid,
                                         vertex_id_type source,
                                         vertex_id_type target) {
    
    unsigned char currentowner = forkset[forkid] & OWNER_BIT;
    // edge_ids for the request bits
    unsigned char my_request_bit = request_bit(currentowner);
    unsigned char other_request_bit = request_bit(!currentowner);
    
    // if the current owner is not eating, and the
    // fork is dirty and other side has placed a request
    if ((forkset[forkid] & DIRTY_BIT) &&
      (forkset[forkid] & other_request_bit)) {
      //  change the owner and clean the fork)
      // keep my request bit if any
      forkset[forkid] = (forkset[forkid] & my_request_bit) | (!currentowner);
      return true;
    }
    return false;
  }
  
  void compute_initial_fork_arrangement() {
    for (vertex_id_type i = 0;i < graph.num_vertices(); ++i) {
      philosopherset[i].num_edges = graph.num_in_edges(i) +
                                    graph.num_out_edges(i);
      philosopherset[i].state = THINKING;
      foreach(typename GraphType::edge_type edge, graph.in_edges(i)) {
        if (edge.source() > edge.target()) {
          forkset[graph.edge_id(edge)] = DIRTY_BIT | 1;
        }
        else {
          forkset[graph.edge_id(edge)] = DIRTY_BIT;
        }
      }
    }
  }


  /**
   * We already have v1, we want to acquire v2.
   * When this function returns, both v1 and v2 locks are acquired
   */
  void try_acquire_edge_with_backoff(vertex_id_type v1,
                                     vertex_id_type v2) {
    if (v1 < v2) {
      philosopherset[v2].lock.lock();
    }
    else if (!philosopherset[v2].lock.try_lock()) {
        philosopherset[v1].lock.unlock();
        philosopherset[v2].lock.lock();
        philosopherset[v1].lock.lock();
    }
  }

  
 public:
  inline chandy_misra_lockfree(GraphType &graph):graph(graph) {
    forkset.resize(graph.num_edges(), 0);
    philosopherset.resize(graph.num_vertices());
    compute_initial_fork_arrangement();
  }

  inline const vertex_id_type invalid_vid() const {
    return (vertex_id_type)(-1);
  }

  inline vertex_id_type make_philosopher_hungry(vertex_id_type p_id) {
    vertex_id_type retval = vertex_id_type(-1);
    //philosopherset[p_id].lock.lock();
    philosopherset[p_id].forks_acquired.value = 0;
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)THINKING);
    philosopherset[p_id].state = HUNGRY;

    // now try to get all the forks. lock one edge at a time
    // using the backoff strategy
    //std::cout << "vertex " << p_id << std::endl;
    //std::cout << "in edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      //try_acquire_edge_with_backoff(edge.target(), edge.source());
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      size_t edgeid = graph.edge_id(edge);
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_SOURCE) {
        request_for_fork(edgeid, OWNER_TARGET);
        
        philosopherset[p_id].forks_acquired.inc(
                advance_fork_state_on_lock(edgeid, edge.source(), edge.target()));
      }
      else {
        philosopherset[p_id].forks_acquired.inc();
      }
      //philosopherset[edge.source()].lock.unlock();
    }
    //std::cout << "out edges: " << std::endl;
    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      //std::cout << "\t" << graph.edge_id(edge) << ": " << edge.source() << "->" << edge.target() << std::endl;
      //try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
 
      // if fork is owned by other edge, try to take it
      if (fork_owner(edgeid) == OWNER_TARGET) {
        request_for_fork(edgeid, OWNER_SOURCE);
        philosopherset[p_id].forks_acquired.inc(
            advance_fork_state_on_lock(edgeid, edge.source(), edge.target()));
      }
      else {
        philosopherset[p_id].forks_acquired.inc();
      }
      //philosopherset[edge.target()].lock.unlock();
    }

    // if I got all forks I can eat
    if (philosopherset[p_id].atomic_eat()) {
      // signal eating
      retval = p_id;
    }
    //philosopherset[p_id].lock.unlock();
    return retval;
  }

  inline std::vector<vertex_id_type> philosopher_stops_eating(size_t p_id) {
    std::vector<vertex_id_type> retval;
    //philosopherset[p_id].lock.lock();
    //philosopher is now hungry!
    ASSERT_EQ((int)philosopherset[p_id].state, (int)EATING);
    // now forks are dirty
    foreach(typename GraphType::edge_type edge, graph.in_edges(p_id)) {
      //try_acquire_edge_with_backoff(edge.target(), edge.source());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.source();
      dirty_fork(edgeid);
      philosopherset[other].forks_acquired.inc(
        advance_fork_state_on_unlock(edgeid, edge.source(), edge.target()));
      if (philosopherset[other].atomic_eat()) {
        // signal eating on other
        retval.push_back(other);
      }
      //philosopherset[edge.source()].lock.unlock();
    }

    foreach(typename GraphType::edge_type edge, graph.out_edges(p_id)) {
      //try_acquire_edge_with_backoff(edge.source(), edge.target());
      size_t edgeid = graph.edge_id(edge);
      vertex_id_type other = edge.target();
      dirty_fork(edgeid);
      philosopherset[other].forks_acquired.inc(
                  advance_fork_state_on_unlock(edgeid, edge.source(), edge.target()));
      if (philosopherset[other].atomic_eat()) {
        // signal eating on other
        retval.push_back(other);
      }
      //philosopherset[other].lock.unlock();
    }
    philosopherset[p_id].state = THINKING;
    
    //philosopherset[p_id].lock.unlock();
    return retval;
  }

  void no_locks_consistency_check() {
    // make sure all forks are dirty
    for (size_t i = 0;i < forkset.size(); ++i) ASSERT_TRUE(fork_dirty(i));
    // all philosophers are THINKING
    for (size_t i = 0;i < philosopherset.size(); ++i) ASSERT_TRUE(philosopherset[i].state == THINKING);
  }

  void complete_consistency_check() {
    for (vertex_id_type v = 0; v < graph.num_vertices(); ++v) {
      // count the number of forks I own
      size_t numowned = 0;
      size_t numowned_clean = 0;
      foreach(typename GraphType::edge_type edge, graph.in_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_TARGET) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      foreach(typename GraphType::edge_type edge, graph.out_edges(v)) {
        size_t edgeid = graph.edge_id(edge);
        if (fork_owner(edgeid) == OWNER_SOURCE) {
          numowned++;
          if (!fork_dirty(edgeid)) numowned_clean++;
        }
      }
      
      if (philosopherset[v].state == THINKING) {
        ASSERT_EQ(numowned_clean, 0);
      }
      else if (philosopherset[v].state == HUNGRY) {
        ASSERT_EQ(philosopherset[v].forks_acquired.value, numowned);
      }
      else if (philosopherset[v].state == EATING) {
        ASSERT_EQ(philosopherset[v].forks_acquired.value, philosopherset[v].num_edges);
        ASSERT_EQ(philosopherset[v].forks_acquired.value, numowned);
      }
    }
  }
};

}

#include <graphlab/macros_undef.hpp>

#endif

================================================
FILE: src/graphlab/util/char_counting_sink.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef CHAR_COUNTING_SINK
#define CHAR_COUNTING_SINK
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/categories.hpp>

namespace graphlab {
  
/**
 \ingroup util_internal
A boost sink device which counts the number of characters written
*/
struct char_counting_sink {
  char_counting_sink(size_t initial = 0):count(initial) { }
  
  char_counting_sink(const char_counting_sink &buf):count(buf.count) { }
  size_t count;
  typedef char        char_type;
  struct category: public boost::iostreams::device_tag,
                       public boost::iostreams::output,
                       public boost::iostreams::multichar_tag,
                       public boost::iostreams::optimally_buffered_tag { };

 /** the optimal buffer size is 0. */
  inline std::streamsize optimal_buffer_size() const { return 0; }

  inline std::streamsize write(const char* s, std::streamsize n) {
    count += n;
    return n;
  }
};
}
#endif


================================================
FILE: src/graphlab/util/charstream.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_CHARSTREAM
#define GRAPHLAB_CHARSTREAM

#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/categories.hpp>

namespace graphlab {

  /// \ingroup util_internal
  namespace charstream_impl {
    /// \ingroup util_internal
    template <bool self_deleting>
    struct resizing_array_sink {


      resizing_array_sink(size_t initial = 0) : str(NULL) { 
        if(initial > 0) {
          str = (char*)(malloc(initial));
          assert(str != NULL);
        } 
        len = 0;
        buffer_size = initial;
      }

      resizing_array_sink(const resizing_array_sink& other) :
        len(other.len), buffer_size(other.buffer_size) {
        if(self_deleting) {
          str = (char*)(malloc(other.buffer_size));
          assert(str != NULL);
          memcpy(str, other.str, len);
        } else {
          str = other.str;
        }
      }

      ~resizing_array_sink() {
        if( self_deleting && str != NULL) {
          free((void*)str);
        }        
      }

      /** Gives up the underlying pointer without
       *  freeing it */
      void relinquish() {
        str = NULL;
        len = 0;
        buffer_size = 0;
      }

      size_t size() const { return len; }
      char* c_str() { return str; }
      const char* c_str() const { return str; }

      void clear() {
        len = 0;
      }

      void clear(size_t new_buffer_size) {
        len = 0;
        str = (char*)realloc(str, new_buffer_size);
        buffer_size = new_buffer_size;
      }

      void reserve(size_t new_buffer_size) {
        if (new_buffer_size > buffer_size) {
          str = (char*)realloc(str, new_buffer_size);
          buffer_size = new_buffer_size;
        }
      }
      
      char *str;
      size_t len;
      size_t buffer_size;
      typedef char        char_type;
      struct category: public boost::iostreams::device_tag,
                       public boost::iostreams::output,
                       public boost::iostreams::multichar_tag { };
      
      /** the optimal buffer size is 0. */
      inline std::streamsize optimal_buffer_size() const { return 0; }

      inline std::streamsize advance(std::streamsize n) {
         if (len + n > buffer_size) {
          // double in length if we need more buffer
          buffer_size = 2 * (len + n);
          str = (char*)realloc(str, buffer_size);
          assert(str != NULL);
        }
        len += n;
        return n;
      }
      
      inline std::streamsize write(const char* s, std::streamsize n) {
        if (len + n > buffer_size) {
          // double in length if we need more buffer
          buffer_size = 2 * (len + n);
          str = (char*)realloc(str, buffer_size);
          assert(str != NULL);
        }
        memcpy(str + len, s, n);
        len += n;
        return n;
      }
      
      inline void swap(resizing_array_sink<self_deleting> &other) {
        std::swap(str, other.str);
        std::swap(len, other.len);
        std::swap(buffer_size, other.buffer_size);
      }

    };
    
  }; // end of impl;
  
  
  /**
   * \ingroup util
   * A stream object which stores all streamed output in memory.
   * It can be used like any other stream object.
   * For instance:
   * \code
   *  charstream cstrm;
   *  cstrm << 123 << 10.0 << "hello world" << std::endl;
   * \endcode
   *
   * stream->size() will return the current length of output
   * and stream->c_str() will return a mutable pointer to the string.
   */
  typedef boost::iostreams::stream< charstream_impl::resizing_array_sink<true> > 
  charstream;


}; // end of namespace graphlab
#endif


================================================
FILE: src/graphlab/util/cuckoo_map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_UTIL_CUCKOO_MAP_HPP
#define GRAPHLAB_UTIL_CUCKOO_MAP_HPP

#include <vector>
#include <iterator>
#include <boost/random.hpp>
#include <boost/unordered_map.hpp>
#include <ctime>
#include <graphlab/serialization/serialization_includes.hpp>

namespace graphlab {


/**
 * A cuckoo hash map which requires the user to
 * provide an "illegal" value thus avoiding the need
 * for a seperate bitmap. More or less similar
 * interface as boost::unordered_map, not necessarily
 * entirely STL compliant.
 */
template <typename Key, typename Value,
          size_t CuckooK = 3,
          typename IndexType = size_t,
          typename Hash = boost::hash<Key>,
          typename Pred = std::equal_to<Key> >
class cuckoo_map {

public:
  // public typedefs
  typedef Key                                      key_type;
  typedef std::pair<Key const, Value>              value_type;
  typedef Value                                    mapped_type;
  typedef Hash                                     hasher;
  typedef Pred                                     key_equal;
  typedef IndexType                                index_type;
  typedef value_type* pointer;
  typedef value_type& reference;
  typedef const value_type* const_pointer;
  typedef const value_type& const_reference;
private:
  // internal typedefs
  typedef std::pair<key_type, mapped_type> non_const_value_type;
  typedef value_type* map_container_type;
  typedef value_type* map_container_iterator;
  typedef const value_type* map_container_const_iterator;
  typedef boost::unordered_map<Key, Value, Hash, Pred> stash_container_type;

  key_type illegalkey;
  index_type numel;
  index_type maxstash;
  map_container_type data;
  size_t datalen;
  stash_container_type stash;
  boost::rand48  drng;
  boost::uniform_int<index_type> kranddist;
  hasher hashfun;
  key_equal keyeq;

  map_container_iterator data_begin() {
    return data;
  }

  map_container_iterator data_end() {
    return data + datalen;
  }

  map_container_const_iterator data_begin() const {
    return data;
  }

  map_container_const_iterator data_end() const {
    return data + datalen;
  }
  // bypass the const key_type with a placement new
  void replace_in_vector(map_container_iterator iter,
                         const key_type& key,
                         const mapped_type& val) {
    // delete
    iter->~value_type();
    // placement new
    new(iter) value_type(key, val);
  }

  void destroy_all() {
    // call ze destructors
    for(size_t i = 0; i < datalen; ++i) {
      data[i].~value_type();
    }
    free(data);
    stash.clear();
    data = NULL;
    datalen = 0;
    numel = 0;
  }

public:
  struct insert_iterator{
    cuckoo_map* cmap;
    typedef std::forward_iterator_tag iterator_category;
    typedef typename cuckoo_map::value_type value_type;

    insert_iterator(cuckoo_map* c):cmap(c) {}

    insert_iterator operator++() {
      return (*this);
    }
    insert_iterator operator++(int) {
      return (*this);
    }

    insert_iterator& operator*() {
      return *this;
    }
    insert_iterator& operator=(const insert_iterator& i) {
      cmap = i.cmap;
      return *this;
    }

    insert_iterator& operator=(const value_type& v) {
      cmap->insert(v);
      return *this;
    }
  };

  struct const_iterator {
    const cuckoo_map* cmap;
    bool in_stash;
    typename cuckoo_map::map_container_const_iterator vec_iter;
    typename cuckoo_map::stash_container_type::const_iterator stash_iter;

    typedef std::forward_iterator_tag iterator_category;
    typedef typename cuckoo_map::value_type value_type;
    typedef size_t difference_type;
    typedef const value_type* pointer;
    typedef const value_type& reference;

    friend class cuckoo_map;

    const_iterator(): cmap(NULL), in_stash(false) {}

    const_iterator operator++() {
      if (!in_stash) {
        ++vec_iter;
        // we are in the main vector. try to advance the
        // iterator until I hit another data element
        while(vec_iter != cmap->data_end() &&
              cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
        if (vec_iter == cmap->data_end()) {
          in_stash = true;
          stash_iter = cmap->stash.begin();
        }
      }
      else if (in_stash) {
        if (stash_iter != cmap->stash.end())  ++stash_iter;
      }
      return *this;
    }

    const_iterator operator++(int) {
      const_iterator cur = *this;
      ++(*this);
      return cur;
    }


    reference operator*() {
      if (!in_stash) return *vec_iter;
      else return *stash_iter;
    }

    pointer operator->() {
      if (!in_stash) return &(*vec_iter);
      else return &(*stash_iter);
    }

    bool operator==(const const_iterator iter) const {
     return in_stash == iter.in_stash &&
             (in_stash==false ?
                  vec_iter == iter.vec_iter :
                  stash_iter == iter.stash_iter);
    }

    bool operator!=(const const_iterator iter) const {
      return !((*this) == iter);
    }

    private:
    const_iterator(const cuckoo_map* cmap, typename cuckoo_map::map_container_const_iterator vec_iter):
      cmap(cmap), in_stash(false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }

    const_iterator(const cuckoo_map* cmap, typename cuckoo_map::stash_container_type::const_iterator stash_iter):
      cmap(cmap), in_stash(true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
  };


  struct iterator {
    cuckoo_map* cmap;
    bool in_stash;
    typename cuckoo_map::map_container_iterator vec_iter;
    typename cuckoo_map::stash_container_type::iterator stash_iter;

    typedef std::forward_iterator_tag iterator_category;
    typedef typename cuckoo_map::value_type value_type;
    typedef size_t difference_type;
    typedef value_type* pointer;
    typedef value_type& reference;

    friend class cuckoo_map;

    iterator(): cmap(NULL), in_stash(false) {}


    operator const_iterator() const {
      const_iterator iter;
      iter.cmap = cmap;
      iter.in_stash = in_stash;
      iter.vec_iter = vec_iter;
      iter.stash_iter = stash_iter;
      return iter;
    }

    iterator operator++() {
      if (!in_stash) {
        ++vec_iter;
        // we are in the main vector. try to advance the
        // iterator until I hit another data element
        while(vec_iter != cmap->data_end() &&
              cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
        if (vec_iter == cmap->data_end()) {
          in_stash = true;
          stash_iter = cmap->stash.begin();
        }
      }
      else if (in_stash) {
        if (stash_iter != cmap->stash.end())  ++stash_iter;
      }
      return *this;
    }

    iterator operator++(int) {
      iterator cur = *this;
      ++(*this);
      return cur;
    }


    reference operator*() {
      if (!in_stash) return *vec_iter;
      else return *stash_iter;
    }

    pointer operator->() {
      if (!in_stash) return &(*vec_iter);
      else return &(*stash_iter);
    }

    bool operator==(const iterator iter) const {
      return in_stash == iter.in_stash &&
             (in_stash==false ?
                  vec_iter == iter.vec_iter :
                  stash_iter == iter.stash_iter);
    }

    bool operator!=(const iterator iter) const {
      return !((*this) == iter);
    }


    private:
    iterator(cuckoo_map* cmap, typename cuckoo_map::map_container_iterator vec_iter):
      cmap(cmap), in_stash(false), vec_iter(vec_iter) { }

    iterator(cuckoo_map* cmap, typename cuckoo_map::stash_container_type::iterator stash_iter):
      cmap(cmap), in_stash(true), stash_iter(stash_iter) { }

  };

private:

  // the primary inserting logic.
  // this assumes that the data is not already in the array.
  // caller must check before performing the insert
  iterator do_insert(const value_type& v_) {
    non_const_value_type v(v_.first, v_.second);
    if (stash.size() > maxstash) {
      // resize
      reserve(datalen * 1.5);
    }

    index_type insertpos = (index_type)(-1); // tracks where the current
                                     // inserted value went
    ++numel;

    // take a random walk down the tree
    for (int i = 0;i < 100; ++i) {
      // first see if one of the hashes will work
      index_type idx = 0;
      bool found = false;
      size_t hash_of_k = hashfun(v.first);
      for (size_t j = 0; j < CuckooK; ++j) {
        idx = compute_hash(hash_of_k, j);
        if (keyeq(data[idx].first, illegalkey)) {
          found = true;
          break;
        }
      }
      if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
      // if insertpos is -1, v holds the current value. and we
      //                     are inserting it into idx
      // if insertpos is idx, we are bumping v again. and v will hold the
      //                      current value once more. so revert
      //                      insertpos to -1
      if (insertpos == (index_type)(-1)) insertpos = idx;
      else if (insertpos == idx) insertpos = (index_type)(-1);
      // there is room here
      if (found || keyeq(data[idx].first, illegalkey)) {
        replace_in_vector(data_begin() + idx, v.first, v.second);
        // success!
        return iterator(this, data_begin() + insertpos);
      }
      // failed to insert!
      // try again!

      non_const_value_type tmp = data[idx];
      replace_in_vector(data_begin() + idx, v.first, v.second);
      v = tmp;
    }
    // ok. tried and failed 100 times.
    //stick it in the stash

    typename stash_container_type::iterator stashiter = stash.insert(v).first;
    // if insertpos is -1, current value went into stash
    if (insertpos == (index_type)(-1)) {
      return iterator(this, stashiter);
    }
    else {
      return iterator(this, data_begin() + insertpos);
    }
  }

public:

  cuckoo_map(key_type illegalkey,
             index_type stashsize = 8,
             hasher const& h = hasher(),
            key_equal const& k = key_equal()):
              illegalkey(illegalkey),
              numel(0),maxstash(stashsize),
              data(NULL), datalen(0),
              drng(time(NULL)),
              kranddist(0, CuckooK - 1), hashfun(h), keyeq(k) {
    stash.max_load_factor(1.0);
    reserve(128);
  }


  cuckoo_map& operator=(const cuckoo_map& other) {
    destroy_all();
    // copy the data
    data = (map_container_type)malloc(sizeof(value_type) * other.datalen);
    datalen = other.datalen;
    std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());

    // copy the stash
    stash = other.stash;

    // copy all the other extra stuff
    illegalkey = other.illegalkey;
    numel = other.numel;
    hashfun = other.hashfun;
    keyeq = other.keyeq;
    return *this;
  }
  
  const key_type& illegal_key() const {
    return illegalkey;
  }
  
  ~cuckoo_map() {
    destroy_all();
  }

  index_type size() {
    return numel;
  }

  iterator begin() {
    iterator iter;
    iter.cmap = this;
    iter.in_stash = false;
    iter.vec_iter = data_begin();

    while(iter.vec_iter != data_end() &&
          keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;


    if (iter.vec_iter == data_end()) {
      iter.in_stash = true;
      iter.stash_iter = stash.begin();
    }
      
    return iter;
  }

  iterator end() {
    return iterator(this, stash.end());
  }


  const_iterator begin() const {
    const_iterator iter;
    iter.cmap = this;
    iter.in_stash = false;
    iter.vec_iter = data_begin();

    while(iter.vec_iter != data_end() &&
          keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;

    if (iter.vec_iter == data_end()) {
      iter.in_stash = true;
      iter.stash_iter = stash.begin();
    }

    return iter;
  }

  const_iterator end() const {
    return const_iterator(this, stash.end());

  }

  /*
   * Bob Jenkin's 32 bit integer mix function from
   * http://home.comcast.net/~bretm/hash/3.html
   */
  static size_t mix(size_t state) {
    state += (state << 12);
    state ^= (state >> 22);
    state += (state << 4);
    state ^= (state >> 9);
    state += (state << 10);
    state ^= (state >> 2);
    state += (state << 7);
    state ^= (state >> 12);
    return state;
  }

  index_type compute_hash(size_t k , const uint32_t seed) const {
    // a bunch of random numbers
#if (__SIZEOF_PTRDIFF_T__ == 8)
      static const size_t a[8] = {0x6306AA9DFC13C8E7,
                                  0xA8CD7FBCA2A9FFD4,
                                  0x40D341EB597ECDDC,
                                  0x99CFA1168AF8DA7E,
                                  0x7C55BCC3AF531D42,
                                  0x1BC49DB0842A21DD,
                                  0x2181F03B1DEE299F,
                                  0xD524D92CBFEC63E9};
#else
      static const size_t a[8] = {0xFC13C8E7,
                                  0xA2A9FFD4,
                                  0x597ECDDC,
                                  0x8AF8DA7E,
                                  0xAF531D42,
                                  0x842A21DD,
                                  0x1DEE299F,
                                  0xBFEC63E9};
#endif

    index_type s = mix(a[seed] ^ k);
    return s % datalen;
  }

  void rehash() {
    stash_container_type stmp;
    stmp.swap(stash);
    // effectively, stmp elements are deleted
    numel -= stmp.size();
    for (size_t i = 0;i < datalen; ++i) {
      // if there is an element here. erase it and reinsert
      if (!keyeq(data[i].first, illegalkey)) {
        if (count(data[i].first)) continue;
        non_const_value_type v = data[i];
        replace_in_vector(data_begin() + i, illegalkey, mapped_type());
        numel--;
        //erase(iterator(this, data_begin() + i));
        insert(v);
      }
    }
    typename stash_container_type::const_iterator iter = stmp.begin();
    while(iter != stmp.end()) {
      insert(*iter);
      ++iter;
    }
  }


  void reserve(size_t newlen) {
    //data.reserve(newlen);
    //data.resize(newlen, std::make_pair<Key, Value>(illegalkey, Value()));
    data = (map_container_type)realloc(data, newlen * sizeof(value_type));
    std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey, mapped_type()));
    datalen = newlen;
    rehash();
  }

  std::pair<iterator, bool> insert(const value_type& v_) {
    iterator i = find(v_.first);
    if (i != end()) return std::make_pair(i, false);
    else return std::make_pair(do_insert(v_), true);
  }


  iterator insert(const_iterator hint, value_type const& v) {
    return insert(v).first;
  }
  
  iterator find(key_type const& k) {
    size_t hash_of_k = hashfun(k);
    for (uint32_t i = 0;i < CuckooK; ++i) {
      index_type idx = compute_hash(hash_of_k, i);
      if (keyeq(data[idx].first, k)) return iterator(this, data_begin() + idx);
    }
    return iterator(this, stash.find(k));
  }

  const_iterator find(key_type const& k) const {
    size_t hash_of_k = hashfun(k);
    for (uint32_t i = 0;i < CuckooK; ++i) {
      index_type idx = compute_hash(hash_of_k, i);
      if (keyeq(data[idx].first, k)) return const_iterator(this, data_begin() + idx);
    }
    return const_iterator(this, stash.find(k));
  }

  size_t count(key_type const& k) const {
    size_t hash_of_k = hashfun(k);
    for (uint32_t i = 0;i < CuckooK; ++i) {
      index_type idx = compute_hash(hash_of_k, i);
      if (keyeq(data[idx].first, k)) return true;
    }
    return stash.count(k);
  }


  void erase(iterator iter) {
    if (iter.in_stash == false) {
      if (!keyeq(iter.vec_iter->first, illegalkey)) {

        replace_in_vector(&(*(iter.vec_iter)), illegalkey, mapped_type());

        --numel;
      }
    }
    else if (iter.stash_iter != stash.end()) {
      --numel;
      stash.erase(iter.stash_iter);
    }
  }

  void erase(key_type const& k) {
    iterator iter = find(k);
    if (iter != end()) erase(iter);
  }

  void swap(cuckoo_map& other) {
    std::swap(illegalkey, other.illegalkey);
    std::swap(numel, other.numel);
    std::swap(maxstash, other.maxstash);
    std::swap(data, other.data);
    std::swap(datalen, other.datalen);
    std::swap(stash, other.stash);
    std::swap(drng, other.drng);
    std::swap(kranddist, other.kranddist);
    std::swap(hashfun, other.hashfun);
    std::swap(keyeq, other.keyeq);
  }

  mapped_type& operator[](const key_type& i) {
    iterator iter = find(i);
    value_type tmp(i, mapped_type());
    if (iter == end()) iter = do_insert(tmp);
    return iter->second;
  }

  key_equal key_eq() const {
    return keyeq;
  }

  void clear() {
    destroy_all();
    reserve(128);
  }


  float load_factor() const {
    return (float)numel / (datalen + stash.size());
  }

  void save(oarchive &oarc) const {
    oarc << numel << illegalkey;
    serialize_iterator(oarc, begin(), end(), numel);
  }


  void load(iarchive &iarc) {
    clear();
    size_t tmpnumel = 0;
    iarc >> tmpnumel >> illegalkey;
    reserve(tmpnumel * 1.5);
    deserialize_iterator<iarchive, non_const_value_type>
      (iarc, insert_iterator(this));
  }
  
};

}

#endif


================================================
FILE: src/graphlab/util/cuckoo_map_pow2.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_UTIL_CUCKOO_MAP_POW2_HPP
#define GRAPHLAB_UTIL_CUCKOO_MAP_POW2_HPP

#include <vector>
#include <iterator>
#include <boost/random.hpp>
#include <boost/unordered_map.hpp>
#include <ctime>
#include <graphlab/serialization/serialization_includes.hpp>
namespace graphlab {


  /**
   * A cuckoo hash map which requires the user to
   * provide an "illegal" value thus avoiding the need
   * for a seperate bitmap. More or less similar
   * interface as boost::unordered_map, not necessarily
   * entirely STL compliant.
   */
  template <typename Key, typename Value,
            size_t CuckooK = 3,
            typename IndexType = size_t,
            typename Hash = boost::hash<Key>,
            typename Pred = std::equal_to<Key> >
  class cuckoo_map_pow2 {

  public:
    // public typedefs
    typedef Key                                      key_type;
    typedef std::pair<Key const, Value>              value_type;
    typedef Value                                    mapped_type;
    typedef Hash                                     hasher;
    typedef Pred                                     key_equal;
    typedef IndexType                                index_type;
    typedef value_type* pointer;
    typedef value_type& reference;
    typedef const value_type* const_pointer;
    typedef const value_type& const_reference;

  private:
    // internal typedefs
    typedef std::pair<key_type, mapped_type> non_const_value_type;
    typedef value_type* map_container_type;
    typedef value_type* map_container_iterator;
    typedef const value_type* map_container_const_iterator;
    typedef boost::unordered_map<Key, Value, Hash, Pred> stash_container_type;

    key_type illegalkey;
    index_type numel;
    index_type maxstash;
    map_container_type data;
    size_t datalen;
    stash_container_type stash;
    boost::rand48  drng;
    boost::uniform_int<index_type> kranddist;
    hasher hashfun;
    key_equal keyeq;
    index_type mask;

    map_container_iterator data_begin() {
      return data;
    }

    map_container_iterator data_end() {
      return data + datalen;
    }

    map_container_const_iterator data_begin() const {
      return data;
    }

    map_container_const_iterator data_end() const {
      return data + datalen;
    }


    // bypass the const key_type with a placement new
    void replace_in_vector(map_container_iterator iter,
                           const key_type& key,
                           const mapped_type& val) {
      // delete
      iter->~value_type();
      // placement new
      new(iter) value_type(key, val);
    }

    void destroy_all() {
      // call ze destructors
      for(size_t i = 0; i < datalen; ++i) {
        data[i].~value_type();
      }
      free(data);
      stash.clear();
      data = NULL;
      datalen = 0;
      numel = 0;
    }

  public:
    struct insert_iterator{
      cuckoo_map_pow2* cmap;
      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_map_pow2::value_type value_type;

      insert_iterator(cuckoo_map_pow2* c):cmap(c) {}
      
      insert_iterator operator++() {
        return (*this);
      }
      insert_iterator operator++(int) {
        return (*this);
      }

      insert_iterator& operator*() {
        return *this;
      }
      insert_iterator& operator=(const insert_iterator& i) {
        cmap = i.cmap;
        return *this;
      }
      
      insert_iterator& operator=(const value_type& v) {
        cmap->insert(v);
        return *this;
      }
    };

    struct const_iterator {
      const cuckoo_map_pow2* cmap;
      bool in_stash;
      typename cuckoo_map_pow2::map_container_const_iterator vec_iter;
      typename cuckoo_map_pow2::stash_container_type::const_iterator stash_iter;

      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_map_pow2::value_type value_type;
      typedef size_t difference_type;
      typedef const value_type* pointer;
      typedef const value_type& reference;

      friend class cuckoo_map_pow2;

      const_iterator(): cmap(NULL), in_stash(false) {}

      const_iterator operator++() {
        if (!in_stash) {
          ++vec_iter;
          // we are in the main vector. try to advance the
          // iterator until I hit another data element
          while(vec_iter != cmap->data_end() &&
                cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
          if (vec_iter == cmap->data_end()) {
            in_stash = true;
            stash_iter = cmap->stash.begin();
          }
        }
        else if (in_stash) {
          if (stash_iter != cmap->stash.end())  ++stash_iter;
        }
        return *this;
      }

      const_iterator operator++(int) {
        const_iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_stash) return *vec_iter;
        else return *stash_iter;
      }

      pointer operator->() {
        if (!in_stash) return &(*vec_iter);
        else return &(*stash_iter);
      }

      bool operator==(const const_iterator iter) const {
        return in_stash == iter.in_stash &&
          (in_stash==false ?
           vec_iter == iter.vec_iter :
           stash_iter == iter.stash_iter);
      }

      bool operator!=(const const_iterator iter) const {
        return !((*this) == iter);
      }

    private:
      const_iterator(const cuckoo_map_pow2* cmap, typename cuckoo_map_pow2::map_container_const_iterator vec_iter):
        cmap(cmap), in_stash(false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }

      const_iterator(const cuckoo_map_pow2* cmap, typename cuckoo_map_pow2::stash_container_type::const_iterator stash_iter):
        cmap(cmap), in_stash(true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
      
    };


    struct iterator {
      cuckoo_map_pow2* cmap;
      bool in_stash;
      typename cuckoo_map_pow2::map_container_iterator vec_iter;
      typename cuckoo_map_pow2::stash_container_type::iterator stash_iter;

      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_map_pow2::value_type value_type;
      typedef size_t difference_type;
      typedef value_type* pointer;
      typedef value_type& reference;

      friend class cuckoo_map_pow2;

      iterator(): cmap(NULL), in_stash(false) {}


      operator const_iterator() const {
        const_iterator iter;
        iter.cmap = cmap;
        iter.in_stash = in_stash;
        iter.vec_iter = vec_iter;
        iter.stash_iter = stash_iter;
        return iter;
      }

      iterator operator++() {
        if (!in_stash) {
          ++vec_iter;
          // we are in the main vector. try to advance the
          // iterator until I hit another data element
          while(vec_iter != cmap->data_end() &&
                cmap->key_eq()(vec_iter->first, cmap->illegal_key())) ++vec_iter;
          if (vec_iter == cmap->data_end()) {
            in_stash = true;
            stash_iter = cmap->stash.begin();
          }
        }
        else if (in_stash) {
          if (stash_iter != cmap->stash.end())  ++stash_iter;
        }
        return *this;
      }

      iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_stash) return *vec_iter;
        else return *stash_iter;
      }

      pointer operator->() {
        if (!in_stash) return &(*vec_iter);
        else return &(*stash_iter);
      }

      bool operator==(const iterator iter) const {
        return in_stash == iter.in_stash &&
          (in_stash==false ?
           vec_iter == iter.vec_iter :
           stash_iter == iter.stash_iter);
      }

      bool operator!=(const iterator iter) const {
        return !((*this) == iter);
      }


    private:
      iterator(cuckoo_map_pow2* cmap, 
               typename cuckoo_map_pow2::map_container_iterator vec_iter):
        cmap(cmap), in_stash(false), vec_iter(vec_iter) { }

      iterator(cuckoo_map_pow2* cmap, 
               typename cuckoo_map_pow2::stash_container_type::iterator stash_iter):
        cmap(cmap), in_stash(true), stash_iter(stash_iter) { }

    };


  private:

    // the primary inserting logic.
    // this assumes that the data is not already in the array.
    // caller must check before performing the insert
    iterator do_insert(const value_type& v_) {
      non_const_value_type v(v_.first, v_.second);
      if (stash.size() > maxstash) {
        // resize
        reserve(datalen * 2);
      }

      index_type insertpos = (index_type)(-1); // tracks where the current
      // inserted value went
      ++numel;

      // take a random walk down the tree
      for (int i = 0;i < 100; ++i) {
        // first see if one of the hashes will work
        index_type idx = 0;
        bool found = false;
        size_t hash_of_k = hashfun(v.first);
        for (size_t j = 0; j < CuckooK; ++j) {
          idx = compute_hash(hash_of_k, j);
          if (keyeq(data[idx].first, illegalkey)) {
            found = true;
            break;
          }
        }
        if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
        // if insertpos is -1, v holds the current value. and we
        //                     are inserting it into idx
        // if insertpos is idx, we are bumping v again. and v will hold the
        //                      current value once more. so revert
        //                      insertpos to -1
        if (insertpos == (index_type)(-1)) insertpos = idx;
        else if (insertpos == idx) insertpos = (index_type)(-1);
        // there is room here
        if (found || keyeq(data[idx].first, illegalkey)) {
          replace_in_vector(data_begin() + idx, v.first, v.second);
          // success!
          return iterator(this, data_begin() + insertpos);
        }
        // failed to insert!
        // try again!

        non_const_value_type tmp = data[idx];
        replace_in_vector(data_begin() + idx, v.first, v.second);
        v = tmp;
      }
      // ok. tried and failed 100 times.
      //stick it in the stash

      typename stash_container_type::iterator stashiter = stash.insert(v).first;
      // if insertpos is -1, current value went into stash
      if (insertpos == (index_type)(-1)) {
        return iterator(this, stashiter);
      }
      else {
        return iterator(this, data_begin() + insertpos);
      }
    }
  public:

    cuckoo_map_pow2(key_type illegalkey,
                    index_type stashsize = 8,
                    hasher const& h = hasher(),
                    key_equal const& k = key_equal()):
      illegalkey(illegalkey),
      numel(0),maxstash(stashsize),
      data(NULL), datalen(0),
      drng(time(NULL)),
      kranddist(0, CuckooK - 1), hashfun(h), keyeq(k), mask(127) {
      stash.max_load_factor(1.0);
      reserve(128);
    }

    const key_type& illegal_key() const {
      return illegalkey;
    }

    ~cuckoo_map_pow2() {
      destroy_all();
    }

    cuckoo_map_pow2& operator=(const cuckoo_map_pow2& other) {
      destroy_all();
      // copy the data
      data = (map_container_type)malloc(sizeof(value_type) * other.datalen);
      datalen = other.datalen;
      std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());

      // copy the stash
      stash = other.stash;

      // copy all the other extra stuff
      illegalkey = other.illegalkey;
      numel = other.numel;
      hashfun = other.hashfun;
      keyeq = other.keyeq;
      mask = other.mask;
      return *this;
    }
  
    index_type size() {
      return numel;
    }

    iterator begin() {
      iterator iter;
      iter.cmap = this;
      iter.in_stash = false;
      iter.vec_iter = data_begin();

      while(iter.vec_iter != data_end() &&
            keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;

      if (iter.vec_iter == data_end()) {
        iter.in_stash = true;
        iter.stash_iter = stash.begin();
      }
      return iter;
    }

    iterator end() {
      return iterator(this, stash.end());
    }


    const_iterator begin() const {
      const_iterator iter;
      iter.cmap = this;
      iter.in_stash = false;
      iter.vec_iter = data_begin();

      while(iter.vec_iter != data_end() &&
            keyeq(iter.vec_iter->first, illegalkey)) ++iter.vec_iter;

      if (iter.vec_iter == data_end()) {
        iter.in_stash = true;
        iter.stash_iter = stash.begin();
      }

      return iter;
    }

    const_iterator end() const {
      return const_iterator(this, stash.end());

    }

    /*
     * Bob Jenkin's 32 bit integer mix function from
     * http://home.comcast.net/~bretm/hash/3.html
     */
    static size_t mix(size_t state) {
      state += (state << 12);
      state ^= (state >> 22);
      state += (state << 4);
      state ^= (state >> 9);
      state += (state << 10);
      state ^= (state >> 2);
      state += (state << 7);
      state ^= (state >> 12);
      return state;
    }

    index_type compute_hash(size_t k , const uint32_t seed) const {
      // a bunch of random numbers
#if (__SIZEOF_PTRDIFF_T__ == 8)
      static const size_t a[8] = {0x6306AA9DFC13C8E7,
                                  0xA8CD7FBCA2A9FFD4,
                                  0x40D341EB597ECDDC,
                                  0x99CFA1168AF8DA7E,
                                  0x7C55BCC3AF531D42,
                                  0x1BC49DB0842A21DD,
                                  0x2181F03B1DEE299F,
                                  0xD524D92CBFEC63E9};
#else
      static const size_t a[8] = {0xFC13C8E7,
                                  0xA2A9FFD4,
                                  0x597ECDDC,
                                  0x8AF8DA7E,
                                  0xAF531D42,
                                  0x842A21DD,
                                  0x1DEE299F,
                                  0xBFEC63E9};
#endif
      index_type s = mix(a[seed] ^ k);
      return s & mask;
    }

    void rehash() {
      stash_container_type stmp;
      stmp.swap(stash);
      // effectively, stmp elements are deleted
      numel -= stmp.size();
      for (size_t i = 0;i < datalen; ++i) {
        // if there is an element here. erase it and reinsert
        if (!keyeq(data[i].first, illegalkey)) {
          if (count(data[i].first)) continue;
          non_const_value_type v = data[i];
          replace_in_vector(data_begin() + i, illegalkey, mapped_type());
          numel--;
          //erase(iterator(this, data_begin() + i));
          insert(v);
        }
      }
      typename stash_container_type::const_iterator iter = stmp.begin();
      while(iter != stmp.end()) {
        insert(*iter);
        ++iter;
      }
    }

    static uint64_t next_powerof2(uint64_t val) {
      --val;
      val = val | (val >> 1);
      val = val | (val >> 2);
      val = val | (val >> 4);
      val = val | (val >> 8);
      val = val | (val >> 16);
      val = val | (val >> 32);
      return val + 1;
    }

  
    void reserve(size_t newlen) {
      newlen = next_powerof2(newlen);
      if (newlen <= datalen) return;
      mask = newlen - 1;
      //data.reserve(newlen);
      //data.resize(newlen, std::make_pair<Key, Value>(illegalkey, Value()));
      data = (map_container_type)realloc(data, newlen * sizeof(value_type));
      std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey, mapped_type()));
      datalen = newlen;
      rehash();
    }

    std::pair<iterator, bool> insert(const value_type& v_) {
      iterator i = find(v_.first);
      if (i != end()) return std::make_pair(i, false);
      else return std::make_pair(do_insert(v_), true);
    }

    iterator insert(const_iterator hint, value_type const& v) {
      return insert(v).first;
    }

    iterator find(key_type const& k) {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx].first, k)) return iterator(this, data_begin() + idx);
      }
      return iterator(this, stash.find(k));
    }

    const_iterator find(key_type const& k) const {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx].first, k)) return const_iterator(this, data_begin() + idx);
      }
      return const_iterator(this, stash.find(k));
    }

    size_t count(key_type const& k) const {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx].first, k)) return true;
      }
      return stash.count(k);
    }

  
    void erase(iterator iter) {
      if (iter.in_stash == false) {
        if (!keyeq(iter.vec_iter->first, illegalkey)) {
        
          replace_in_vector(&(*(iter.vec_iter)), illegalkey, mapped_type());

          --numel;
        }
      }
      else if (iter.stash_iter != stash.end()) {
        --numel;
        stash.erase(iter.stash_iter);
      }
    }

    void erase(key_type const& k) {
      iterator iter = find(k);
      if (iter != end()) erase(iter);
    }

    void swap(cuckoo_map_pow2& other) {
      std::swap(illegalkey, other.illegalkey);
      std::swap(numel, other.numel);
      std::swap(maxstash, other.maxstash);
      std::swap(data, other.data);
      std::swap(datalen, other.datalen);
      std::swap(stash, other.stash);
      std::swap(drng, other.drng);
      std::swap(kranddist, other.kranddist);
      std::swap(hashfun, other.hashfun);
      std::swap(keyeq, other.keyeq);
      std::swap(mask, other.mask);
    }
  
    mapped_type& operator[](const key_type& i) {
      iterator iter = find(i);
      value_type tmp(i, mapped_type());
      if (iter == end()) iter = do_insert(tmp);
      return iter->second;
    }

    key_equal key_eq() const {
      return keyeq;
    }

    void clear() {
      destroy_all();
      reserve(128);
    }


    float load_factor() const {
      return (float)numel / (datalen + stash.size());
    }

    void save(oarchive &oarc) const {
      oarc << numel << illegalkey;
      serialize_iterator(oarc, begin(), end(), numel);
    }


    void load(iarchive &iarc) {
      clear();
      index_type tmpnumel = 0;
      iarc >> tmpnumel >> illegalkey;
      //std::cout << tmpnumel << ", " << illegalkey << std::endl;
      reserve(tmpnumel * 1.5);
      deserialize_iterator<iarchive, non_const_value_type>
        (iarc, insert_iterator(this));
      // for(size_t i = 0; i < tmpnumel; ++i) {
      //   non_const_value_type pair;
      //   iarc >> pair; 
      //   operator[](pair.first) = pair.second;
      // }
    }
  
  }; // end of cuckoo_map_pow2

}; // end of graphlab namespace

#endif


================================================
FILE: src/graphlab/util/cuckoo_set_pow2.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_UTIL_CUCKOO_SET_POW2_HPP
#define GRAPHLAB_UTIL_CUCKOO_SET_POW2_HPP

#include <vector>
#include <iterator>
#include <boost/random.hpp>
#include <boost/unordered_map.hpp>
#include <ctime>
#include <graphlab/serialization/serialization_includes.hpp>
namespace graphlab {


  /**
   * A cuckoo hash map which requires the user to
   * provide an "illegal" value thus avoiding the need
   * for a seperate bitmap. More or less similar
   * interface as boost::unordered_map, not necessarily
   * entirely STL compliant.
   */
  template <typename Key, 
            size_t CuckooK = 3,
            typename IndexType = size_t,
            typename Hash = boost::hash<Key>,
            typename Pred = std::equal_to<Key> >
  class cuckoo_set_pow2 {

  public:
    // public typedefs
    typedef Key                                      key_type;
    typedef Key              value_type;
    typedef Hash                                     hasher;
    typedef Pred                                     key_equal;
    typedef IndexType                                index_type;
    typedef value_type* pointer;
    typedef value_type& reference;
    typedef const value_type* const_pointer;
    typedef const value_type& const_reference;

  private:
    // internal typedefs
    typedef key_type non_const_value_type;
    typedef value_type* map_container_type;
    typedef value_type* map_container_iterator;
    typedef const value_type* map_container_const_iterator;
    typedef std::vector<Key> stash_container_type;

    key_type illegalkey;
    index_type numel;
    index_type maxstash;
    map_container_type data;
    size_t datalen;
    stash_container_type stash;
    boost::rand48  drng;
    boost::uniform_int<index_type> kranddist;
    hasher hashfun;
    key_equal keyeq;
    index_type mask;

    map_container_iterator data_begin() {
      return data;
    }

    map_container_iterator data_end() {
      return data + datalen;
    }

    map_container_const_iterator data_begin() const {
      return data;
    }

    map_container_const_iterator data_end() const {
      return data + datalen;
    }


    // bypass the const key_type with a placement new
    void replace_in_vector(map_container_iterator iter,
                           const key_type& key) {
      // delete
      iter->~value_type();
      // placement new
      new(iter) value_type(key);
    }

    void destroy_all() {
      if (data != NULL) {
        // call ze destructors
        for(size_t i = 0; i < datalen; ++i) {
          data[i].~value_type();
        }
        free(data);
      }
      stash.clear();
      data = NULL;
      datalen = 0;
      numel = 0;
    }

  public:
    struct insert_iterator{
      cuckoo_set_pow2* cmap;
      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_set_pow2::value_type value_type;

      insert_iterator(cuckoo_set_pow2* c):cmap(c) {}
      
      insert_iterator operator++() {
        return (*this);
      }
      insert_iterator operator++(int) {
        return (*this);
      }

      insert_iterator& operator*() {
        return *this;
      }
      insert_iterator& operator=(const insert_iterator& i) {
        cmap = i.cmap;
        return *this;
      }
      
      insert_iterator& operator=(const value_type& v) {
        cmap->insert(v);
        return *this;
      }
    };

    struct const_iterator {
      const cuckoo_set_pow2* cmap;
      bool in_stash;
      typename cuckoo_set_pow2::map_container_const_iterator vec_iter;
      typename cuckoo_set_pow2::stash_container_type::const_iterator stash_iter;

      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_set_pow2::value_type value_type;
      typedef size_t difference_type;
      typedef const value_type& reference;
      typedef const value_type* pointer;
      friend class cuckoo_set_pow2;

      const_iterator(): cmap(NULL), in_stash(false) {}

      const_iterator operator++() {
        if (!in_stash) {
          ++vec_iter;
          // we are in the main vector. try to advance the
          // iterator until I hit another data element
          while(vec_iter != cmap->data_end() &&
                cmap->key_eq()(*vec_iter, cmap->illegal_key())) ++vec_iter;
          if (vec_iter == cmap->data_end()) {
            in_stash = true;
            stash_iter = cmap->stash.begin();
          }
        }
        else if (in_stash) {
          if (stash_iter != cmap->stash.end())  ++stash_iter;
        }
        return *this;
      }

      const_iterator operator++(int) {
        const_iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_stash) return *vec_iter;
        else return *stash_iter;
      }

      bool operator==(const const_iterator iter) const {
        return in_stash == iter.in_stash &&
          (in_stash==false ?
           vec_iter == iter.vec_iter :
           stash_iter == iter.stash_iter);
      }

      bool operator!=(const const_iterator iter) const {
        return !((*this) == iter);
      }

    private:
      const_iterator(const cuckoo_set_pow2* cmap, typename cuckoo_set_pow2::map_container_const_iterator vec_iter):
        cmap(cmap), in_stash(false), vec_iter(vec_iter), stash_iter(cmap->stash.begin()) { }

      const_iterator(const cuckoo_set_pow2* cmap, typename cuckoo_set_pow2::stash_container_type::const_iterator stash_iter):
        cmap(cmap), in_stash(true), vec_iter(cmap->data_begin()), stash_iter(stash_iter) { }
      
    };


    struct iterator {
      cuckoo_set_pow2* cmap;
      bool in_stash;
      typename cuckoo_set_pow2::map_container_iterator vec_iter;
      typename cuckoo_set_pow2::stash_container_type::iterator stash_iter;

      typedef std::forward_iterator_tag iterator_category;
      typedef typename cuckoo_set_pow2::value_type value_type;
      typedef size_t difference_type;
      typedef value_type& reference;
      typedef value_type* pointer;
      friend class cuckoo_set_pow2;

      iterator(): cmap(NULL), in_stash(false) {}


      operator const_iterator() const {
        const_iterator iter;
        iter.cmap = cmap;
        iter.in_stash = in_stash;
        iter.vec_iter = vec_iter;
        iter.stash_iter = stash_iter;
        return iter;
      }

      iterator operator++() {
        if (!in_stash) {
          ++vec_iter;
          // we are in the main vector. try to advance the
          // iterator until I hit another data element
          while(vec_iter != cmap->data_end() &&
                cmap->key_eq()(*vec_iter, cmap->illegal_key())) ++vec_iter;
          if (vec_iter == cmap->data_end()) {
            in_stash = true;
            stash_iter = cmap->stash.begin();
          }
        }
        else if (in_stash) {
          if (stash_iter != cmap->stash.end())  ++stash_iter;
        }
        return *this;
      }

      iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_stash) return *vec_iter;
        else return *stash_iter;
      }

      bool operator==(const iterator iter) const {
        return in_stash == iter.in_stash &&
          (in_stash==false ?
           vec_iter == iter.vec_iter :
           stash_iter == iter.stash_iter);
      }

      bool operator!=(const iterator iter) const {
        return !((*this) == iter);
      }


    private:
      iterator(cuckoo_set_pow2* cmap, 
               typename cuckoo_set_pow2::map_container_iterator vec_iter):
        cmap(cmap), in_stash(false), vec_iter(vec_iter) { }

      iterator(cuckoo_set_pow2* cmap, 
               typename cuckoo_set_pow2::stash_container_type::iterator stash_iter):
        cmap(cmap), in_stash(true), stash_iter(stash_iter) { }

    };


  private:

    // the primary inserting logic.
    // this assumes that the data is not already in the array.
    // caller must check before performing the insert
    iterator do_insert(const value_type& v_) {
      non_const_value_type v = v_;
      if (stash.size() > maxstash) {
        // resize
        reserve(datalen * 2);
      }

      index_type insertpos = (index_type)(-1); // tracks where the current
      // inserted value went
      ++numel;

      // take a random walk down the tree
      for (int i = 0;i < 100; ++i) {
        // first see if one of the hashes will work
        index_type idx = 0;
        bool found = false;
        size_t hash_of_k = hashfun(v);
        for (size_t j = 0; j < CuckooK; ++j) {
          idx = compute_hash(hash_of_k, j);
          if (keyeq(data[idx], illegalkey)) {
            found = true;
            break;
          }
        }
        if (!found) idx = compute_hash(hash_of_k, kranddist(drng));
        // if insertpos is -1, v holds the current value. and we
        //                     are inserting it into idx
        // if insertpos is idx, we are bumping v again. and v will hold the
        //                      current value once more. so revert
        //                      insertpos to -1
        if (insertpos == (index_type)(-1)) insertpos = idx;
        else if (insertpos == idx) insertpos = (index_type)(-1);
        // there is room here
        if (found || keyeq(data[idx], illegalkey)) {
          replace_in_vector(data_begin() + idx, v);
          // success!
          return iterator(this, data_begin() + insertpos);
        }
        // failed to insert!
        // try again!

        non_const_value_type tmp = data[idx];
        replace_in_vector(data_begin() + idx, v);
        v = tmp;
      }
      // ok. tried and failed 100 times.
      //stick it in the stash

      typename stash_container_type::iterator stashiter = stash.insert(stash.end(), v);
      // if insertpos is -1, current value went into stash
      if (insertpos == (index_type)(-1)) {
        return iterator(this, stashiter);
      }
      else {
        return iterator(this, data_begin() + insertpos);
      }
    }
  public:

    cuckoo_set_pow2(key_type illegalkey,
                    index_type stashsize = 8,
                    index_type reserve_size = 128,
                    hasher const& h = hasher(),
                    key_equal const& k = key_equal()):
      illegalkey(illegalkey),
      numel(0),maxstash(stashsize),
      data(NULL), datalen(0),
      drng(time(NULL)),
      kranddist(0, CuckooK - 1), hashfun(h), keyeq(k), mask(reserve_size - 1) {
      reserve(reserve_size);
    }

    cuckoo_set_pow2(const cuckoo_set_pow2& other): 
      illegalkey(other.illegalkey),
      numel(0), maxstash(other.maxstash),
      data(NULL), datalen(0),
      drng(time(NULL)), kranddist(0, CuckooK - 1),
      hashfun(other.hashfun), keyeq(other.keyeq), mask(0) {
      data = NULL;
      (*this) = other;
    }
 

    const key_type& illegal_key() const {
      return illegalkey;
    }

    ~cuckoo_set_pow2() {
      destroy_all();
    }

    cuckoo_set_pow2& operator=(const cuckoo_set_pow2& other) {
      if (&other == this) return *this;
      if (other.numel == 0 && numel == 0) return *this;
      else if (other.numel == 0) {
        for (size_t i = 0;i < datalen; ++i) data[i] = illegalkey;
        stash.clear();
        numel = 0;
        return *this;
      }
      else {
        destroy_all();

        // copy the data
        data = (map_container_type)malloc(sizeof(value_type) * other.datalen);
        datalen = other.datalen;
        std::uninitialized_copy(other.data_begin(), other.data_end(), data_begin());
        // copy the stash
        stash = other.stash;
        numel = other.numel;
        hashfun = other.hashfun;
        keyeq = other.keyeq;
        mask = other.mask;
      }
      return *this;
    }
  
    index_type size() const {
      return numel;
    }

    iterator begin() {
      iterator iter;
      iter.cmap = this;
      iter.in_stash = false;
      iter.vec_iter = data_begin();

      while(iter.vec_iter != data_end() &&
            keyeq(*(iter.vec_iter), illegalkey)) ++iter.vec_iter;

      if (iter.vec_iter == data_end()) {
        iter.in_stash = true;
        iter.stash_iter = stash.begin();
      }
      return iter;
    }

    iterator end() {
      return iterator(this, stash.end());
    }


    const_iterator begin() const {
      const_iterator iter;
      iter.cmap = this;
      iter.in_stash = false;
      iter.vec_iter = data_begin();

      while(iter.vec_iter != data_end() &&
            keyeq(*(iter.vec_iter), illegalkey)) ++iter.vec_iter;


      if (iter.vec_iter == data_end()) {
        iter.in_stash = true;
        iter.stash_iter = stash.begin();
      }

      return iter;
    }

    const_iterator end() const {
      return const_iterator(this, stash.end());

    }

    /*
     * Bob Jenkin's 32 bit integer mix function from
     * http://home.comcast.net/~bretm/hash/3.html
     */
    static size_t mix(size_t state) {
      state += (state << 12);
      state ^= (state >> 22);
      state += (state << 4);
      state ^= (state >> 9);
      state += (state << 10);
      state ^= (state >> 2);
      state += (state << 7);
      state ^= (state >> 12);
      return state;
    }

    index_type compute_hash(size_t k , const uint32_t seed) const {
      // a bunch of random numbers
#if (__SIZEOF_PTRDIFF_T__ == 8)
      static const size_t a[8] = {0x6306AA9DFC13C8E7,
                                  0xA8CD7FBCA2A9FFD4,
                                  0x40D341EB597ECDDC,
                                  0x99CFA1168AF8DA7E,
                                  0x7C55BCC3AF531D42,
                                  0x1BC49DB0842A21DD,
                                  0x2181F03B1DEE299F,
                                  0xD524D92CBFEC63E9};
#else
      static const size_t a[8] = {0xFC13C8E7,
                                  0xA2A9FFD4,
                                  0x597ECDDC,
                                  0x8AF8DA7E,
                                  0xAF531D42,
                                  0x842A21DD,
                                  0x1DEE299F,
                                  0xBFEC63E9};
#endif
      index_type s = mix(a[seed] ^ k);
      return s & mask;
    }

    void rehash() {
      if (numel == 0) return;
      stash_container_type stmp;
      stmp.swap(stash);
      // effectively, stmp elements are deleted
      numel -= stmp.size();
      for (size_t i = 0;i < datalen; ++i) {
        // if there is an element here. erase it and reinsert
        if (!keyeq(data[i], illegalkey)) {
          if (count(data[i])) continue;
          non_const_value_type v = data[i];
          replace_in_vector(data_begin() + i, illegalkey);
          numel--;
          //erase(iterator(this, data_begin() + i));
          insert(v);
        }
      }
      typename stash_container_type::const_iterator iter = stmp.begin();
      while(iter != stmp.end()) {
        insert(*iter);
        ++iter;
      }
    }

    static uint64_t next_powerof2(uint64_t val) {
      --val;
      val = val | (val >> 1);
      val = val | (val >> 2);
      val = val | (val >> 4);
      val = val | (val >> 8);
      val = val | (val >> 16);
      val = val | (val >> 32);
      return val + 1;
    }

  
    void reserve(size_t newlen) {
      newlen = next_powerof2(newlen);
      if (newlen <= datalen) return;

      mask = newlen - 1;
      //data.reserve(newlen);
      //data.resize(newlen, std::make_pair<Key, Value>(illegalkey, Value()));
      data = (map_container_type)realloc(data, newlen * sizeof(value_type));
      std::uninitialized_fill(data_end(), data+newlen, non_const_value_type(illegalkey));
      datalen = newlen;
      rehash();
    }

    std::pair<iterator, bool> insert(const value_type& v_) {
      iterator i = find(v_);
      if (i != end()) return std::make_pair(i, false);
      else return std::make_pair(do_insert(v_), true);
    }

    iterator insert(const_iterator hint, value_type const& v) {
      return insert(v).first;
    }

    iterator find(key_type const& k) {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx], k)) return iterator(this, data_begin() + idx);
      }
      return iterator(this, std::find(stash.begin(), stash.end(), k));
    }

    const_iterator find(key_type const& k) const {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx], k)) return const_iterator(this, data_begin() + idx);
      }
      return const_iterator(this, std::find(stash.begin(), stash.end(), k));
    }

    size_t count(key_type const& k) const {
      size_t hash_of_k = hashfun(k);
      for (uint32_t i = 0;i < CuckooK; ++i) {
        index_type idx = compute_hash(hash_of_k, i);
        if (keyeq(data[idx], k)) return 1;
      }
      for (size_t i = 0; i < stash.size(); ++i) {
        if (stash[i] == k) return 1;
      }
      return 0;
    }

  
    void erase(iterator iter) {
      if (iter.in_stash == false) {
        if (!keyeq(*(iter.vec_iter), illegalkey)) {
        
          replace_in_vector(&(*(iter.vec_iter)), illegalkey);

          --numel;
        }
      }
      else if (iter.stash_iter != stash.end()) {
        --numel;
        stash.erase(iter.stash_iter);
      }
    }

    void erase(key_type const& k) {
      iterator iter = find(k);
      if (iter != end()) erase(iter);
    }

    void swap(cuckoo_set_pow2& other) {
      std::swap(illegalkey, other.illegalkey);
      std::swap(numel, other.numel);
      std::swap(maxstash, other.maxstash);
      std::swap(data, other.data);
      std::swap(datalen, other.datalen);
      std::swap(stash, other.stash);
      std::swap(drng, other.drng);
      std::swap(kranddist, other.kranddist);
      std::swap(hashfun, other.hashfun);
      std::swap(keyeq, other.keyeq);
      std::swap(mask, other.mask);
    }
  
    key_equal key_eq() const {
      return keyeq;
    }

    void clear() {
      destroy_all();
      reserve(4);
    }


    float load_factor() const {
      return (float)numel / (datalen + stash.size());
    }

    void save(oarchive &oarc) const {
      oarc << size_t(numel);
      serialize_iterator(oarc, begin(), end(), numel);
    }


    void load(iarchive &iarc) {
      for (size_t i = 0;i < datalen; ++i) data[i] = illegalkey;
      stash.clear();
      numel = 0;
      size_t tmpnumel;
      iarc >> tmpnumel;
      reserve(tmpnumel * 1.5);
      //std::cout << tmpnumel << ", " << illegalkey << std::endl;
      deserialize_iterator<iarchive, non_const_value_type>
        (iarc, insert_iterator(this));
      ASSERT_EQ(numel, tmpnumel);
      // for(size_t i = 0; i < tmpnumel; ++i) {
      //   non_const_value_type pair;
      //   iarc >> pair; 
      //   operator[](pair.first) = pair.second;
      // }
    }
  
  }; // end of cuckoo_set_pow2

}; // end of graphlab namespace

#endif


================================================
FILE: src/graphlab/util/dense_bitset.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DENSE_BITSET_HPP
#define GRAPHLAB_DENSE_BITSET_HPP

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <graphlab/logger/logger.hpp>
#include <graphlab/parallel/atomic_ops.hpp>
#include <graphlab/serialization/serialization_includes.hpp>

namespace graphlab {
  
  /**  \ingroup util
   *  Implements an atomic dense bitset
   */
  class dense_bitset {
  public:
    
    /// Constructs a bitset of 0 length
    dense_bitset() : array(NULL), len(0), arrlen(0) {
    }

    /// Constructs a bitset with 'size' bits. All bits will be cleared.
    explicit dense_bitset(size_t size) : array(NULL), len(0), arrlen(0) {
      resize(size);
      clear();
    }

    /// Make a copy of the bitset db
    dense_bitset(const dense_bitset &db) {
      array = NULL;
      len = 0;
      arrlen = 0;
      *this = db;
    }
    
    /// destructor
    ~dense_bitset() {free(array);}
  
    /// Make a copy of the bitset db
    inline dense_bitset& operator=(const dense_bitset& db) {
      resize(db.size());
      len = db.len;
      arrlen = db.arrlen;
      memcpy(array, db.array, sizeof(size_t) * arrlen);
      return *this;
    }
  
    /** Resizes the current bitset to hold n bits.
    Existing bits will not be changed. If the array size is increased,
    the value of the new bits are undefined.
    
    \Warning When shirnking, the current implementation may still leave the
    "deleted" bits in place which will mess up the popcount. 
    */
    inline void resize(size_t n) {
      len = n;
      //need len bits
      size_t prev_arrlen = arrlen;
      arrlen = (n / (sizeof(size_t) * 8)) + (n % (sizeof(size_t) * 8) > 0);
      array = (size_t*)realloc(array, sizeof(size_t) * arrlen);
      // this zeros the remainder of the block after the last bit
      fix_trailing_bits();
      // if we grew, we need to zero all new blocks
      if (arrlen > prev_arrlen) {
        for (size_t i = prev_arrlen; i < arrlen; ++i) {
          array[i] = 0;
        }
      }
    }
  
    /// Sets all bits to 0
    inline void clear() {
      for (size_t i = 0; i < arrlen; ++i) array[i] = 0;
    }
    
    inline bool empty() const {
      for (size_t i = 0; i < arrlen; ++i) if (array[i]) return false;
      return true;
    }
    
    /// Sets all bits to 1
    inline void fill() {
      for (size_t i = 0;i < arrlen; ++i) array[i] = (size_t) - 1;
      fix_trailing_bits();
    }

    /// Prefetches the word containing the bit b
    inline void prefetch(size_t b) const{
      __builtin_prefetch(&(array[b / (8 * sizeof(size_t))]));
    }
    
    /// Returns the value of the bit b
    inline bool get(size_t b) const{
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      return array[arrpos] & (size_t(1) << size_t(bitpos));
    }

    //! Atomically sets the bit at position b to true returning the old value
    inline bool set_bit(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t mask(size_t(1) << size_t(bitpos)); 
      return __sync_fetch_and_or(array + arrpos, mask) & mask;
    }
    
    //! Atomically xors a bit with 1
    inline bool xor_bit(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t mask(size_t(1) << size_t(bitpos)); 
      return __sync_fetch_and_xor(array + arrpos, mask) & mask;
    }
 
    //! Returns the value of the word containing the bit b 
    inline size_t containing_word(size_t b) {
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      return array[arrpos];
    }

    //! Returns the value of the word containing the bit b 
    inline size_t get_containing_word_and_zero(size_t b) {
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      return fetch_and_store(array[arrpos], size_t(0));
    }

    /** 
     * \brief Transfers approximately b bits from another bitset to this bitset 
     * 
     * "Moves" at least b bits from the other bitset to this bitset
     * starting from the given position.
     * At return, b will contain the actual number of bits moved,
     * and start will point to the end of the transfered region.
     *
     * Semantically what this accomplishes is something like:
     *
     * \code
     * idx = start;
     * if other.get_bit(idx) == false {
     *    idx = next true bit after idx in other (with loop around)
     * }
     * for(transferred = 0; transferred < b; transferred++) {
     *    other.clear_bit(idx);
     *    this->set_bit(idx);
     *    idx = next true bit after idx in other.
     *    if no more bits, return
     * }
     * \endcode
     * However, the implementation here may transfer more than b bits.
     * ( up to b + 2 * wordsize_in_bits )
     */
    inline void transfer_approximate_unsafe(dense_bitset& other, 
                                            size_t& start, 
                                            size_t& b) {
      // must be identical in length
      ASSERT_EQ(other.len, len);
      ASSERT_EQ(other.arrlen, arrlen);
      size_t arrpos, bitpos;
      bit_to_pos(start, arrpos, bitpos);
      size_t initial_arrpos = arrpos;
      if (arrpos >= arrlen) arrpos = 0;
      // ok. we will only look at arrpos
      size_t transferred = 0;
      while(transferred < b) {
        if (other.array[arrpos] > 0) { 
          transferred += __builtin_popcountl(other.array[arrpos]);
          array[arrpos] |= other.array[arrpos];
          other.array[arrpos] = 0;
        }
        ++arrpos;
        if (arrpos >= other.arrlen) arrpos = 0;
        else if (arrpos == initial_arrpos) break;
      }
      start = 8 * sizeof(size_t) * arrpos;
      b = transferred;
    }


    /** Set the bit at position b to true returning the old value.
        Unlike set_bit(), this uses a non-atomic set which is faster,
        but is unsafe if accessed by multiple threads.
    */
    inline bool set_bit_unsync(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t mask(size_t(1) << size_t(bitpos)); 
      bool ret = array[arrpos] & mask;
      array[arrpos] |= mask;
      return ret;
    }

    //! Atomically sets the state of the bit to the new value returning the old value
    inline bool set(size_t b, bool value) {
      if (value) return set_bit(b);
      else return clear_bit(b);
    }

    /** Set the state of the bit returning the old value.
      This version uses a non-atomic set which is faster, but
      is unsafe if accessed by multiple threads.
    */
    inline bool set_unsync(size_t b, bool value) {
      if (value) return set_bit_unsync(b);
      else return clear_bit_unsync(b);
    }


    //! Atomically set the bit at b to false returning the old value
    inline bool clear_bit(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t test_mask(size_t(1) << size_t(bitpos)); 
      const size_t clear_mask(~test_mask); 
      return __sync_fetch_and_and(array + arrpos, clear_mask) & test_mask;
    }

    /** Clears the state of the bit returning the old value.
      This version uses a non-atomic set which is faster, but
      is unsafe if accessed by multiple threads.
    */
    inline bool clear_bit_unsync(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t test_mask(size_t(1) << size_t(bitpos)); 
      const size_t clear_mask(~test_mask); 
      bool ret = array[arrpos] & test_mask;
      array[arrpos] &= clear_mask;
      return ret;
    }

    struct bit_pos_iterator {
      typedef std::input_iterator_tag iterator_category;
      typedef size_t value_type;
      typedef size_t difference_type;
      typedef const size_t reference;
      typedef const size_t* pointer;
      size_t pos;
      const dense_bitset* db;
      bit_pos_iterator():pos(-1),db(NULL) {}
      bit_pos_iterator(const dense_bitset* const db, size_t pos):pos(pos),db(db) {}
      
      size_t operator*() const {
        return pos;
      }
      size_t operator++(){
        if (db->next_bit(pos) == false) pos = (size_t)(-1);
        return pos;
      }
      size_t operator++(int){
        size_t prevpos = pos;
        if (db->next_bit(pos) == false) pos = (size_t)(-1);
        return prevpos;
      }
      bool operator==(const bit_pos_iterator& other) const {
        ASSERT_TRUE(db == other.db);
        return other.pos == pos;
      }
      bool operator!=(const bit_pos_iterator& other) const {
        ASSERT_TRUE(db == other.db);
        return other.pos != pos;
      }
    };
    
    typedef bit_pos_iterator iterator;
    typedef bit_pos_iterator const_iterator;

    
    bit_pos_iterator begin() const {
      size_t pos;
      if (first_bit(pos) == false) pos = size_t(-1);
      return bit_pos_iterator(this, pos);
    }
    
    bit_pos_iterator end() const {
      return bit_pos_iterator(this, (size_t)(-1));
    }

    /** Returns true with b containing the position of the 
        first bit set to true.
        If such a bit does not exist, this function returns false.
    */
    inline bool first_bit(size_t &b) const {
      for (size_t i = 0; i < arrlen; ++i) {
        if (array[i]) {
          b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
          return true;
        }
      }
      return false;
    }


    /** Returns true with b containing the position of the 
        first bit set to false.
        If such a bit does not exist, this function returns false.
    */
    inline bool first_zero_bit(size_t &b) const {
      for (size_t i = 0; i < arrlen; ++i) {
        if (~array[i]) {
          b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(~array[i]);
          return true;
        }
      }
      return false;
    }

    /** Where b is a bit index, this function will return in b,
        the position of the next bit set to true, and return true.
        If all bits after b are false, this function returns false.
    */
    inline bool next_bit(size_t &b) const {
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      //try to find the next bit in this block
      bitpos = next_bit_in_block(bitpos, array[arrpos]);
      if (bitpos != 0) {
        b = (size_t)(arrpos * (sizeof(size_t) * 8)) + bitpos;
        return true;
      }
      else {
        // we have to loop through the rest of the array
        for (size_t i = arrpos + 1; i < arrlen; ++i) {
          if (array[i]) {
            b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
            return true;
          }
        }
      }
      return false;
    }

    ///  Returns the number of bits in this bitset
    inline size_t size() const {
      return len;
    }
    
    /// Serializes this bitset to an archive
    inline void save(oarchive& oarc) const {
      oarc <<len << arrlen;
      if (arrlen > 0) serialize(oarc, array, arrlen*sizeof(size_t));
    }

    /// Deserializes this bitset from an archive
    inline void load(iarchive& iarc) {
      if (array != NULL) free(array);
      array = NULL;
      iarc >> len >> arrlen;
      if (arrlen > 0) {
        array = (size_t*)malloc(arrlen*sizeof(size_t));
        deserialize(iarc, array, arrlen*sizeof(size_t));
      }
    }


    size_t popcount() const {
      size_t ret = 0;
      for (size_t i = 0;i < arrlen; ++i) {
        ret +=  __builtin_popcountl(array[i]);
      }
      return ret;
    }

    dense_bitset operator&(const dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] & other.array[i];
      }
      return ret;
    }


    dense_bitset operator|(const dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] | other.array[i];
      }
      return ret;
    }

    dense_bitset operator-(const dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] - (array[i] & other.array[i]);
      }
      return ret;
    }


    dense_bitset& operator&=(const dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] &= other.array[i];
      }
      return *this;
    }


    dense_bitset& operator|=(const dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] |= other.array[i];
      }
      return *this;
    }

    dense_bitset& operator-=(const dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] = array[i] - (array[i] & other.array[i]);
      }
      return *this;
    }

    void invert() {
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] = ~array[i];
      }
      fix_trailing_bits();
    }

  private:
   
    inline static void bit_to_pos(size_t b, size_t& arrpos, size_t& bitpos) {
      // the compiler better optimize this...
      arrpos = b / (8 * sizeof(size_t));
      bitpos = b & (8 * sizeof(size_t) - 1);
    }
  
    // returns 0 on failure
    inline size_t next_bit_in_block(const size_t& b, const size_t& block) const {
      size_t belowselectedbit = size_t(-1) - (((size_t(1) << b) - 1)|(size_t(1)<<b));
      size_t x = block & belowselectedbit ;
      if (x == 0) return 0;
      else return (size_t)__builtin_ctzl(x);
    }

    // returns 0 on failure
    inline size_t first_bit_in_block(const size_t& block) const{
      if (block == 0) return 0;
      else return (size_t)__builtin_ctzl(block);
    }


    void fix_trailing_bits() {
      // how many bits are in the last block
      size_t lastbits = len % (8 * sizeof(size_t));
      if (lastbits == 0) return;
      array[arrlen - 1] &= ((size_t(1) << lastbits) - 1);
    }

    size_t* array;
    size_t len;
    size_t arrlen;

    template <int len>
    friend class fixed_dense_bitset;
  };


  /**
  Like bitset, but of a fixed length as defined by the template parameter
  */
  template <int len>
  class fixed_dense_bitset {
  public:
    /// Constructs a bitset of 0 length
    fixed_dense_bitset() {
      clear();
    }
    
   /// Make a copy of the bitset db
    fixed_dense_bitset(const fixed_dense_bitset<len> &db) {
      *this = db;
    }

    /** Initialize this fixed dense bitset by copying 
        ceil(len/(wordlen)) words from mem
    */
    void initialize_from_mem(void* mem, size_t memlen) {
      memcpy(array, mem, memlen);
    }
    
    /// destructor
    ~fixed_dense_bitset() {}
  
    /// Make a copy of the bitset db
    inline fixed_dense_bitset<len>& operator=(const fixed_dense_bitset<len>& db) {
      memcpy(array, db.array, sizeof(size_t) * arrlen);
      return *this;
    }
  
    /// Sets all bits to 0
    inline void clear() {
      memset((void*)array, 0, sizeof(size_t) * arrlen);
    }
    
    /// Sets all bits to 1
    inline void fill() {
      for (size_t i = 0;i < arrlen; ++i) array[i] = -1;
      fix_trailing_bits();
    }

    inline bool empty() const {
      for (size_t i = 0; i < arrlen; ++i) if (array[i]) return false;
      return true;
    }
    
    /// Prefetches the word containing the bit b
    inline void prefetch(size_t b) const{
      __builtin_prefetch(&(array[b / (8 * sizeof(size_t))]));
    }
    
    /// Returns the value of the bit b
    inline bool get(size_t b) const{
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      return array[arrpos] & (size_t(1) << size_t(bitpos));
    }

    //! Atomically sets the bit at b to true returning the old value
    inline bool set_bit(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t mask(size_t(1) << size_t(bitpos)); 
      return __sync_fetch_and_or(array + arrpos, mask) & mask;
    }


    //! Returns the value of the word containing the bit b 
    inline size_t containing_word(size_t b) {
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      return array[arrpos];
    }


    /** Set the bit at position b to true returning the old value.
        Unlike set_bit(), this uses a non-atomic set which is faster,
        but is unsafe if accessed by multiple threads.
    */
    inline bool set_bit_unsync(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t mask(size_t(1) << size_t(bitpos)); 
      bool ret = array[arrpos] & mask;
      array[arrpos] |= mask;
      return ret;
    }

    /** Set the state of the bit returning the old value.
      This version uses a non-atomic set which is faster, but
      is unsafe if accessed by multiple threads.
    */
    inline bool set(size_t b, bool value) {
      if (value) return set_bit(b);
      else return clear_bit(b);
    }

    /** Set the state of the bit returning the old value.
      This version uses a non-atomic set which is faster, but
      is unsafe if accessed by multiple threads.
    */
    inline bool set_unsync(size_t b, bool value) {
      if (value) return set_bit_unsync(b);
      else return clear_bit_unsync(b);
    }


    //! Atomically set the bit at b to false returning the old value
    inline bool clear_bit(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t test_mask(size_t(1) << size_t(bitpos)); 
      const size_t clear_mask(~test_mask); 
      return __sync_fetch_and_and(array + arrpos, clear_mask) & test_mask;
    }

    /** Clears the state of the bit returning the old value.
      This version uses a non-atomic set which is faster, but
      is unsafe if accessed by multiple threads.
    */
    inline bool clear_bit_unsync(size_t b) {
      // use CAS to set the bit
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      const size_t test_mask(size_t(1) << size_t(bitpos)); 
      const size_t clear_mask(~test_mask); 
      bool ret = array[arrpos] & test_mask;
      array[arrpos] &= clear_mask;
      return ret;
    }


    struct bit_pos_iterator {
      typedef std::input_iterator_tag iterator_category;
      typedef size_t value_type;
      typedef size_t difference_type;
      typedef const size_t reference;
      typedef const size_t* pointer;
      size_t pos;
      const fixed_dense_bitset* db;
      bit_pos_iterator():pos(-1),db(NULL) {}
      bit_pos_iterator(const fixed_dense_bitset* const db, size_t pos):pos(pos),db(db) {}
      
      size_t operator*() const {
        return pos;
      }
      size_t operator++(){
        if (db->next_bit(pos) == false) pos = (size_t)(-1);
        return pos;
      }
      size_t operator++(int){
        size_t prevpos = pos;
        if (db->next_bit(pos) == false) pos = (size_t)(-1);
        return prevpos;
      }
      bool operator==(const bit_pos_iterator& other) const {
        ASSERT_TRUE(db == other.db);
        return other.pos == pos;
      }
      bool operator!=(const bit_pos_iterator& other) const {
        ASSERT_TRUE(db == other.db);
        return other.pos != pos;
      }
    };
    
    typedef bit_pos_iterator iterator;
    typedef bit_pos_iterator const_iterator;

    
    bit_pos_iterator begin() const {
      size_t pos;
      if (first_bit(pos) == false) pos = size_t(-1);
      return bit_pos_iterator(this, pos);
    }
    
    bit_pos_iterator end() const {
      return bit_pos_iterator(this, (size_t)(-1));
    }

    /** Returns true with b containing the position of the 
        first bit set to true.
        If such a bit does not exist, this function returns false.
    */
    inline bool first_bit(size_t &b) const {
      for (size_t i = 0; i < arrlen; ++i) {
        if (array[i]) {
          b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
          return true;
        }
      }
      return false;
    }

    /** Returns true with b containing the position of the 
        first bit set to false.
        If such a bit does not exist, this function returns false.
    */
    inline bool first_zero_bit(size_t &b) const {
      for (size_t i = 0; i < arrlen; ++i) {
        if (~array[i]) {
          b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(~array[i]);
          return true;
        }
      }
      return false;
    }


    /** Where b is a bit index, this function will return in b,
        the position of the next bit set to true, and return true.
        If all bits after b are false, this function returns false.
    */
    inline bool next_bit(size_t &b) const {
      size_t arrpos, bitpos;
      bit_to_pos(b, arrpos, bitpos);
      //try to find the next bit in this block
      bitpos = next_bit_in_block(bitpos, array[arrpos]);
      if (bitpos != 0) {
        b = (size_t)(arrpos * (sizeof(size_t) * 8)) + bitpos;
        return true;
      }
      else {
        // we have to loop through the rest of the array
        for (size_t i = arrpos + 1; i < arrlen; ++i) {
          if (array[i]) {
            b = (size_t)(i * (sizeof(size_t) * 8)) + first_bit_in_block(array[i]);
            return true;
          }
        }
      }
      return false;
    }
    
    ///  Returns the number of bits in this bitset
    inline size_t size() const {
      return len;
    }
    
    /// Serializes this bitset to an archive
    inline void save(oarchive& oarc) const {
      //oarc <<len << arrlen;
      //if (arrlen > 0)
      serialize(oarc, array, arrlen*sizeof(size_t));
    }

    /// Deserializes this bitset from an archive
    inline void load(iarchive& iarc) {
      /*size_t l;
      size_t arl;
      iarc >> l >> arl;
      ASSERT_EQ(l, len);
      ASSERT_EQ(arl, arrlen);*/
      //if (arrlen > 0) {
      deserialize(iarc, array, arrlen*sizeof(size_t));
      //}
    }

    size_t popcount() const {
      size_t ret = 0;
      for (size_t i = 0;i < arrlen; ++i) {
        ret +=  __builtin_popcountl(array[i]);
      }
      return ret;
    }

    fixed_dense_bitset operator&(const fixed_dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      fixed_dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] & other.array[i];
      }
      return ret;
    }


    fixed_dense_bitset operator|(const fixed_dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      fixed_dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] | other.array[i];
      }
      return ret;
    }

    fixed_dense_bitset operator-(const fixed_dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      fixed_dense_bitset ret(size());
      for (size_t i = 0; i < arrlen; ++i) {
        ret.array[i] = array[i] - (array[i] & other.array[i]);
      }
      return ret;
    }


    fixed_dense_bitset& operator&=(const fixed_dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] &= other.array[i];
      }
      return *this;
    }


    fixed_dense_bitset& operator|=(const fixed_dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] |= other.array[i];
      }
      return *this;
    }

    fixed_dense_bitset& operator-=(const fixed_dense_bitset& other) {
      ASSERT_EQ(size(), other.size());
      for (size_t i = 0; i < arrlen; ++i) {
        array[i] = array[i] - (array[i] & other.array[i]);
      }
      return *this;
    }

    bool operator==(const fixed_dense_bitset& other) const {
      ASSERT_EQ(size(), other.size());
      ASSERT_EQ(arrlen, other.arrlen);
      bool ret = true;
      for (size_t i = 0; i < arrlen; ++i) {
        ret &= (array[i] == other.array[i]);
      }
      return ret;
    }


  private:
    inline static void bit_to_pos(size_t b, size_t &arrpos, size_t &bitpos) {
      // the compiler better optimize this...
      arrpos = b / (8 * sizeof(size_t));
      bitpos = b & (8 * sizeof(size_t) - 1);
    }
  

    // returns 0 on failure
    inline size_t next_bit_in_block(const size_t &b, const size_t &block) const {
      size_t belowselectedbit = size_t(-1) - (((size_t(1) << b) - 1)|(size_t(1)<<b));
      size_t x = block & belowselectedbit ;
      if (x == 0) return 0;
      else return (size_t)__builtin_ctzl(x);
    }

    // returns 0 on failure
    inline size_t first_bit_in_block(const size_t &block) const {
      // use CAS to set the bit
      if (block == 0) return 0;
      else return (size_t)__builtin_ctzl(block);
    }

    // clears the trailing bits in the last block which are not part
    // of the actual length of the bitset
    void fix_trailing_bits() {
      // how many bits are in the last block
      size_t lastbits = len % (8 * sizeof(size_t));
      if (lastbits == 0) return;
      array[arrlen - 1] &= ((size_t(1) << lastbits) - 1);
    }

 
    static const size_t arrlen;
    size_t array[len / (sizeof(size_t) * 8) + (len % (sizeof(size_t) * 8) > 0)];
  };

  template<int len>
  const size_t fixed_dense_bitset<len>::arrlen = len / (sizeof(size_t) * 8) + (len % (sizeof(size_t) * 8) > 0);
}
#endif


================================================
FILE: src/graphlab/util/empty.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_EMPTY_HPP
#define GRAPHLAB_UTIL_EMPTY_HPP
#include <vector>
#include <algorithm>
#include <stdexcept>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
namespace graphlab {

struct empty {
  void save(oarchive&) const { }
  void load(iarchive&) { }
  empty& operator+=(const empty&) { return *this; }
};

} // namespace graphlab;


namespace std {

template <>
class vector<graphlab::empty, allocator<graphlab::empty> > {
 public:
  size_t len;
  graphlab::empty e;

 public:
  struct iterator {
    typedef int difference_type;
    typedef graphlab::empty value_type;
    typedef graphlab::empty* pointer;
    typedef graphlab::empty& reference;
    typedef const graphlab::empty& const_reference;
    typedef random_access_iterator_tag iterator_category;
    
    graphlab::empty e;
    size_t i; const size_t* len;
    iterator(): i(0),len(NULL) { }
    iterator(size_t i, const size_t* len): i(i), len(len) { }
    bool operator==(const iterator& iter) const {
      return i == iter.i;
    }
    bool operator!=(const iterator& iter) const {
      return !((*this) == iter);
    }
    iterator operator++() {
      i += (i < *len);
      return *this;
    }
    reference operator*() {
      return e;
    }
    const_reference operator*() const {
      return e;
    }
    iterator operator++(int) {
      iterator old = (*this);
      i += (i < *len);
      return old;
    }
    iterator operator--() {
      i -= (i > 0);
      return *this;
    }
    iterator operator--(int) {
      iterator old = (*this);
      i -= (i > 0);
      return old;
    }
    iterator operator+=(int n) {
      i += n;
      if (n > 0 && i > (*len)) i = (*len); // overflow
      else if (n < 0 && i > (*len)) i = 0; // underflow
      return *this;
    }
    iterator operator-=(int n) {
      i += -n;
      return *this;
    }
    iterator operator+(int n) const {
      iterator tmp = (*this);
      tmp += n;
      return tmp;
    }
    iterator operator-(int n) const {
      iterator tmp = (*this);
      tmp -= n;
      return tmp;
    }

    int operator-(iterator n) const {
      return i - n.i;
    }

    bool operator<(iterator other) const {
      return i < other.i;
    }
    bool operator<=(iterator other) const {
      return i <= other.i;
    }
    bool operator>(iterator other) const {
      return i > other.i;
    }
    bool operator>=(iterator other) const {
      return i >= other.i;
    }
  };
  
  typedef iterator const_iterator;
  typedef iterator reverse_iterator;
  typedef iterator const_reverse_iterator;
  typedef graphlab::empty& reference;
  typedef const graphlab::empty& const_reference;
  typedef allocator<graphlab::empty> allocator_type;

  // default constructor
  explicit vector(const allocator_type& a = allocator_type()):len(0) { }
  // construct from value
  explicit vector(size_t n, const graphlab::empty& val = graphlab::empty(),
           const allocator_type& a = allocator_type()):len(n) { }
  // construct from iterator
  template <typename InputIterator>
  vector(InputIterator first, InputIterator last, 
         const allocator_type& a = allocator_type()): len(std::distance(first, last)) { }
  // copy constructor
  vector(const vector<graphlab::empty, allocator_type>& v): len(v.len) { };

  iterator begin() {
    return iterator(0, &len);
  }
  iterator end() {
    return iterator(len, &len);
  }
  const_iterator begin() const {
    return const_iterator(0, &len);
  }
  const_iterator end() const {
    return const_iterator(len, &len);
  }

  reverse_iterator rbegin() {
    return iterator(0, &len);
  }
  reverse_iterator rend() {
    return iterator(len, &len);
  }
  const_reverse_iterator rbegin() const {
    return const_iterator(0, &len);
  }
  const_reverse_iterator rend() const {
    return const_iterator(len, &len);
  }

  size_t size() const {
    return len;
  }
  
  size_t capacity() const {
    return len;
  }

  bool empty() const {
    return len == 0;
  }

  void resize(size_t s, const graphlab::empty& e = graphlab::empty()) { len = s; }

  void reserve(size_t s) {}

  reference operator[](int i) {
    return e;
  }

  const_reference operator[](int i) const {
    return e;
  }

  reference at(int i) {
    if (i < 0 || (size_t)i >= len) throw std::out_of_range("vector index out of range");
    else return e;
  }

  const_reference at(int i) const {
    if (i < 0 || (size_t)i >= len) throw std::out_of_range("vector index out of range");
    else return e;
  }

  template <typename InputIterator>
  void assign(InputIterator first, InputIterator last) {
    len = std::distance(first, last);
  }


  void assign(size_t n, const graphlab::empty&) {
    len = n;
  }

  void push_back(const graphlab::empty&) {
    ++len;
  }

  void pop_back(const graphlab::empty&) {
    --len;
  }

  void insert(iterator, const graphlab::empty&) {
    ++len;
  }


  void insert(iterator, size_t n, const graphlab::empty&) {
    len += n;
  }


  template <typename InputIterator>
  void insert(iterator, InputIterator first, InputIterator last) {
    len += std::distance(first, last);
  }

  void erase(iterator) {
    --len;
  }

  void erase(iterator first, iterator last) {
    len -= (last - first);
  }

  void swap(vector<graphlab::empty, allocator_type> &v) {
    std::swap(len, v.len);
  }

  void clear() {
    len = 0;
  }

  allocator_type get_allocator() const {
    return allocator_type();
  }
};
  
} // namespace std

// serialization of the empty vector is a problem since it will
// preferentially dispatch to the general vector serialize implementation
// instead of the one built into vector<empty>. Using the out of place
// save/load will fix this

BEGIN_OUT_OF_PLACE_SAVE(arc, std::vector<graphlab::empty>, tval)
  arc<< tval.len;
END_OUT_OF_PLACE_SAVE()

BEGIN_OUT_OF_PLACE_LOAD(arc, std::vector<graphlab::empty>, tval)
  arc >> tval.len;
END_OUT_OF_PLACE_LOAD()


#endif


================================================
FILE: src/graphlab/util/event_log.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/event_log.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/logger/assertions.hpp>

#define EVENT_BAR_WIDTH 40
#define BAR_CHARACTER '#'

namespace graphlab {
  
static std::ofstream eventlog_file;
static mutex eventlog_file_mutex;
static bool eventlog_file_open = false;

static timer event_timer;
static bool event_timer_started = false;
static mutex event_timer_mutex;


void event_log::initialize(std::ostream &ostrm,
                           size_t flush_interval_ms,
                           event_print_type event_print) {
  m.lock();
  out = &ostrm;
  flush_interval = flush_interval_ms;
  print_method = event_print;
  
  event_timer_mutex.lock();
  if (event_timer_started == false) {
    event_timer_started = true;
    event_timer.start();
  }
  event_timer_mutex.unlock();
  prevtime = event_timer.current_time_millis();
  
  cond.signal(); 
  m.unlock();
  
  if (event_print == LOG_FILE) {
    eventlog_file_mutex.lock();
    if (!eventlog_file_open) {
      eventlog_file_open = true;
      eventlog_file.open("eventlog.txt");
    }
    out = &eventlog_file;
    eventlog_file_mutex.unlock();
  }

}

event_log::~event_log() {
  finished = true;
  m.lock();
  cond.signal();
  m.unlock();
  printing_thread.join();
  if (print_method != LOG_FILE) {
    size_t pos;
    if (hascounter.first_bit(pos)) {
      do {
        (*out) << descriptions[pos]  << ":\t" << totalcounter[pos].value << " Events\n";
      } while(hascounter.next_bit(pos));
    }
  }
  else{
    size_t pos;
    if (hascounter.first_bit(pos)) {
      do {
        std::cout << descriptions[pos]  << ":\t" << totalcounter[pos].value << " Events\n";
      } while(hascounter.next_bit(pos));
    }
  }
}


void event_log::immediate_event(unsigned char eventid) {
  m.lock();
  immediate_events.push_back(std::make_pair(eventid, event_timer.current_time_millis()));
  m.unlock();
}

void event_log::close() {
  out = NULL;
  m.lock();
  flush_interval = 0;
  m.unlock();
}

void event_log::add_event_type(unsigned char eventid,
                               std::string description) {
  descriptions[eventid] = description;
  max_desc_length = std::max(max_desc_length, description.length());
  ASSERT_MSG(max_desc_length <= 30, "Event Description length must be <= 30 characters");
  counters[eventid].value = 0;
  hascounter.set_bit(eventid);
}

void event_log::add_immediate_event_type(unsigned char eventid, std::string description) {
  descriptions[eventid] = description;
  max_desc_length = std::max(max_desc_length, description.length());
  ASSERT_MSG(max_desc_length <= 30, "Event Description length must be <= 30 characters");
  counters[eventid].value = 0;
}

void event_log::flush() {
  size_t pos;
  if (!hascounter.first_bit(pos)) return;
  double curtime = event_timer.current_time_millis();
  double timegap = curtime - prevtime;
  prevtime = curtime;

  if (hasevents == false && noeventctr == 1) return;
  
  bool found_events = false;
  if (print_method == NUMBER) {
    do {
      size_t ctrval = counters[pos].exchange(0);
      found_events = found_events || ctrval > 0;
      (*out) << pos  << ":\t" << curtime << "\t" << ctrval << "\t" << 1000 * ctrval / timegap << " /s\n";
    } while(hascounter.next_bit(pos));
    // flush immediate events
    if (!immediate_events.empty()) { 
      std::vector<std::pair<unsigned char, size_t> > cur;
      cur.swap(immediate_events);
      for (size_t i = 0;i < cur.size(); ++i) {
        (*out) << (size_t)cur[i].first << ":\t" << cur[i].second << "\t" << -1 << "\t" << 0 << " /s\n";
      }
    }
    out->flush();
  }
  else if (print_method == DESCRIPTION) {
    do {
      size_t ctrval = counters[pos].exchange(0);
      found_events = found_events || ctrval > 0;
      (*out) << descriptions[pos]  << ":\t" << curtime << "\t" << ctrval << "\t" << 1000 * ctrval / timegap << " /s\n";
    } while(hascounter.next_bit(pos));
    if (!immediate_events.empty()) { 
      std::vector<std::pair<unsigned char, size_t> > cur;
      cur.swap(immediate_events);
      for (size_t i = 0;i < cur.size(); ++i) {
        (*out) << descriptions[cur[i].first] << ":\t" << cur[i].second << "\t" << -1 << "\t" << 0 << " /s\n";
      }
    }
    out->flush();
  }
  else if (print_method == LOG_FILE) {
    eventlog_file_mutex.lock();
    do {
      size_t ctrval = counters[pos].exchange(0);
      found_events = found_events || ctrval > 0;
      (*out) << descriptions[pos]  << ":\t" << curtime << "\t" << ctrval << "\t" << 1000 * ctrval / timegap << "\n";
    } while(hascounter.next_bit(pos));
    if (!immediate_events.empty()) { 
      std::vector<std::pair<unsigned char, size_t> > cur;
      cur.swap(immediate_events);
      for (size_t i = 0;i < cur.size(); ++i) {
        (*out) << descriptions[cur[i].first] << ":\t" << cur[i].second << "\t" << -1 << "\t" << 0 << " /s\n";
      }
    }
    eventlog_file_mutex.unlock();
    out->flush();
  }
  else if (print_method == RATE_BAR) {
    (*out) << "Time: " << "+"<<timegap << "\t" << curtime << "\n";
    char spacebuf[60];
    char pbuf[61];
    memset(spacebuf, ' ', EVENT_BAR_WIDTH);
    memset(pbuf, BAR_CHARACTER, 60);
    do {
      size_t ctrval = counters[pos].exchange(0);
      found_events = found_events || ctrval > 0;
      maxcounter[pos] = std::max(maxcounter[pos], ctrval);
      size_t barlen = 0;
      size_t mc = maxcounter[pos]; 
      if (mc > 0) barlen = ctrval * EVENT_BAR_WIDTH / mc;
      if (barlen > EVENT_BAR_WIDTH) barlen = EVENT_BAR_WIDTH;
      
      pbuf[barlen] = '\0';
      spacebuf[max_desc_length - descriptions[pos].length() + 1] = 0;
      (*out) << descriptions[pos]  << spacebuf << "|" << pbuf;
      spacebuf[max_desc_length - descriptions[pos].length() + 1] =' ';
      pbuf[barlen] = BAR_CHARACTER;
      // now print the remaining spaces
      spacebuf[EVENT_BAR_WIDTH - barlen] = '\0';
      (*out) << spacebuf << "| " << ctrval << " : " << mc << " \n";
      spacebuf[EVENT_BAR_WIDTH - barlen] = ' ';
      
    } while(hascounter.next_bit(pos));
    out->flush();
  }
  if (found_events == false) {
      ++noeventctr;
  }
  else {
      noeventctr = 0;
  }
  hasevents = false;

}

void event_log::thread_loop() {
  m.lock();
  while(!finished) {
    if (flush_interval == 0) {
      cond.wait(m);
    }
    else {
      m.unlock();
      my_sleep_ms(flush_interval);
      m.lock();
      if (flush_interval > 0) flush();
    }
  }
  m.unlock();
}

} // namespace


================================================
FILE: src/graphlab/util/event_log.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef EVENT_LOG_HPP
#define EVENT_LOG_HPP
#include <iostream>
#include <boost/bind.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/dense_bitset.hpp>
namespace graphlab {

#define EVENT_MAX_COUNTERS 256

class event_log{
 public:
  enum event_print_type{
    NUMBER,
    DESCRIPTION,
    RATE_BAR,
    LOG_FILE
  };
 private:
  std::ostream* out;  // output target
  volatile size_t flush_interval; // flush frequency (ms)
  event_print_type print_method;  // print method
  volatile bool finished; // set on destruction
  
  mutex m;          
  conditional cond;
  
  thread printing_thread;
  std::string descriptions[EVENT_MAX_COUNTERS];
  size_t maxcounter[EVENT_MAX_COUNTERS];
  atomic<size_t> counters[EVENT_MAX_COUNTERS];
  atomic<size_t> totalcounter[EVENT_MAX_COUNTERS];

  std::vector<std::pair<unsigned char, size_t> > immediate_events;
  
  double prevtime;  // last time flush() was called
  bool hasevents;   // whether there are logging events
  size_t noeventctr; // how many times flush() was called with no events
                     // zeroed when the events are next observed.

  size_t max_desc_length; // maximum descriptor length
  fixed_dense_bitset<EVENT_MAX_COUNTERS> hascounter; 
 public:
  inline event_log():out(NULL),
                     flush_interval(0),
                     print_method(DESCRIPTION),
                     finished(false),
                     prevtime(0),
                     hasevents(false),
                     noeventctr(0),
                     max_desc_length(0) {
    hascounter.clear();
    for (size_t i = 0;i < EVENT_MAX_COUNTERS; ++i) {
      maxcounter[i] = 0;
      totalcounter[i].value = 0;
    }
    printing_thread.launch(boost::bind(&event_log::thread_loop, this));
  }
  
  void initialize(std::ostream &ostrm,
                  size_t flush_interval_ms,
                  event_print_type event_print);

  void close();
  
  void thread_loop();

  void add_event_type(unsigned char eventid, std::string description);
  void add_immediate_event_type(unsigned char eventid, std::string description);

  inline void accumulate_event(unsigned char eventid,
                               size_t count)  __attribute__((always_inline)) {
    hasevents = true;
    counters[eventid].inc(count);
    totalcounter[eventid].inc(count);
  }

  void immediate_event(unsigned char eventid);

  void flush();

  ~event_log();
};

}
/**
 * DECLARE_EVENT_LOG(name)
 * creates an event log with a given name. This creates a variable
 * called "name" which is of type event_log. and is equivalent to:
 *
 * graphlab::event_log name;
 *
 * The primary reason to use this macro instead of just writing
 * the code above directly, is that the macro is ignored and compiles
 * to nothing when event logs are disabled.
 *
 *
 * INITIALIZE_EVENT_LOG(name, ostrm, flush_interval, printdesc)
 * ostrm is the output std::ostream object. A pointer of the stream
 * is taken so the stream should not be destroyed until the event log is closed
 * flush_interval is the flush frequency in milliseconds.
 * printdesc is either graphlab::event_log::NUMBER, or graphlab::event_log::DESCRIPTION or
 * graphlab::event_log::RATE_BAR. There is also graphlab::event_log::LOG_FILE
 * which outputs to a centralized log. If this is used, the ostrm argument
 * is ignored and an output of the DESCRIPTION format will be written to an
 * eventlog.txt file in the current directory.
 * 
 * ADD_EVENT_TYPE(name, id, desc)
 * Creates an event type with an integer ID, and a description.
 * Event types should be mostly consecutive since the internal
 * storage format is a array. Valid ID range is [0, 255]
 *
 * ACCUMULATE_EVENT(name, id, count)
 * Adds 'count' events of type "id"
 *
 * FLUSH_EVENT_LOG(name)
 * Forces a flush of the accumulated events to the provided output stream
 */
#ifdef USE_EVENT_LOG
#define DECLARE_EVENT_LOG(name) graphlab::event_log name;
#define INITIALIZE_EVENT_LOG(name, ostrm, flush_interval, printdesc)    \
                    name.initialize(ostrm, flush_interval, printdesc);
#define ADD_EVENT_TYPE(name, id, desc) name.add_event_type(id, desc);
#define ADD_IMMEDIATE_EVENT_TYPE(name, id, desc) name.add_immediate_event_type(id, desc);
#define ACCUMULATE_EVENT(name, id, count) name.accumulate_event(id, count);
#define IMMEDIATE_EVENT(name, id) name.immediate_event(id);
#define FLUSH_EVENT_LOG(name) name.flush();
#define CLOSE_EVENT_LOG(name) name.close();

#else
#define DECLARE_EVENT_LOG(name) 
#define INITIALIZE_EVENT_LOG(name, ostrm, flush_interval, printdesc)
#define ADD_EVENT_TYPE(name, id, desc) 
#define ADD_IMMEDIATE_EVENT_TYPE(name, id, desc) 
#define ACCUMULATE_EVENT(name, id, count) 
#define IMMEDIATE_EVENT(name, id)
#define FLUSH_EVENT_LOG(name) 
#define CLOSE_EVENT_LOG(name)
#endif

#define PERMANENT_DECLARE_EVENT_LOG(name) graphlab::event_log name;
#define PERMANENT_INITIALIZE_EVENT_LOG(name, ostrm, flush_interval, printdesc)    \
                    name.initialize(ostrm, flush_interval, printdesc);
#define PERMANENT_ADD_EVENT_TYPE(name, id, desc) name.add_event_type(id, desc);
#define PERMANENT_ADD_IMMEDIATE_EVENT_TYPE(name, id, desc) name.add_immediate_event_type(id, desc);
#define PERMANENT_ACCUMULATE_EVENT(name, id, count) name.accumulate_event(id, count);
#define PERMANENT_IMMEDIATE_EVENT(name, id) name.immediate_event(id);
#define PERMANENT_FLUSH_EVENT_LOG(name) name.flush();
#define PERMANENT_CLOSE_EVENT_LOG(name) name.close();


#endif

================================================
FILE: src/graphlab/util/fast_multinomial.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FAST_MULTINOMIAL_HPP
#define GRAPHLAB_FAST_MULTINOMIAL_HPP

#include <vector>
#include <algorithm>


#include <boost/integer.hpp>
#include <boost/random.hpp>

#include <graphlab.hpp>

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/atomic.hpp>

#include <graphlab/util/generics/float_selector.hpp>


#include <graphlab/macros_def.hpp>


namespace graphlab {

  
  /// \ingroup util_internal
  class fast_multinomial {
    // system word length float
    typedef float_selector<sizeof(size_t)>::float_type float_t;

    //! First leaf index
    size_t first_leaf_index;

    //! The number of assignments to the multinomial
    size_t num_asg;

    //! The tree datastructure
    std::vector<float_t> tree;

    //! the number of positive probability elements
    atomic<size_t> num_support;


    // Helper routines
    // ========================================================>
    
    //! Compute the next power of 2
    //     size_t next_powerof2(size_t val) {
    //       size_t powof2 = 1;
    //       while(powof2 < val) powof2 = powof2 * 2;
    //       return powof2;
    //     }

    //! Clever next power of two bit magic (by ylow)
    uint64_t next_powerof2(uint64_t val) {
      --val;
      val = val | (val >> 1);
      val = val | (val >> 2);
      val = val | (val >> 4);
      val = val | (val >> 8);
      val = val | (val >> 16);
      val = val | (val >> 32);
      return val + 1; 
    }

    
    //! Returns the index of the left child of the supplied index.
    size_t left_child(size_t i) const { return 2 * i + 1; }
    
    //! Returns the index of the right child of the supplied index.
    size_t right_child(size_t i) const { return 2 * i + 2; }
    
    //! Returns the index of the parent of the supplied index.
    size_t parent(size_t i) const { return (i-1) / 2; }

    //! returns the sibling of
    size_t sibling(size_t i) const {
      // the binary here is equivalent to (+1) if leaf is odd and (-1) if 
      // leaf is even
      return i + (i & 1)*2 - 1;
    }

    //! get the tree location of the assignment
    size_t tree_loc_from_asg(size_t asg) const {
      size_t loc = asg + first_leaf_index;
      assert(loc < tree.size());
      assert(is_leaf(loc));
      return loc;
    }

    //! determine the assignment from the location in the tree
    size_t asg_from_tree_loc(size_t i) const {
      assert(is_leaf(i));
      size_t asg = i - first_leaf_index;
      assert(asg < num_asg);
      return asg;
    }

    
    //! Returns true if the index corresponds to a leaf
    bool is_leaf(size_t i) const {
      return i >= first_leaf_index;
      //      return left_child(i) > tree.size();
    }

    //! Returns true if the location is the root
    bool is_root(size_t i) const { return i == 0; }
  

    /// Returns the index of a leaf sampled proportionate to its
    /// priority.  Returns false on failure
    bool try_sample(size_t& asg, size_t cpuid) {
      size_t loc = 0;
      while ( !is_leaf(loc) ) {
        // get the left and right priorities
        float_t left_p = tree.at(left_child(loc));
        float_t right_p = tree.at(right_child(loc));
        // if both are zero, the sample has failed. Return
        if (left_p + right_p == 0) return false;
        else if (right_p == 0) loc = left_child(loc);
        else if (left_p == 0)  loc = right_child(loc);
        else {
          // pick from a bernoulli trial
          float_t childsum = left_p + right_p;   
          float_t rndnumber = graphlab::random::uniform<float_t>(0,1);
          if((childsum * rndnumber)  < left_p)
            loc = left_child(loc);
          else
            loc = right_child(loc);
        }
      }
      assert(is_leaf(loc));
      asg = asg_from_tree_loc(loc);
      assert(asg < num_asg);
      return true;
    } // end of sample index


    /// Propagates a cumulative sum update up the tree.
    void propagate_change(size_t loc) {
      // Loop while the location is not the root each time moving up
      // the tree
      for( ; !is_root(loc); loc = parent(loc) ) {
        // Get the sibbling of this location
        size_t sibling_loc = sibling(loc);
        assert(sibling_loc < tree.size());
        // Get the parent
        size_t parent_loc = parent(loc);
        assert(parent_loc < tree.size());
        // Assert that the sibling is infact the sibling
        assert(parent_loc == parent(sibling_loc));
        // Get the priority of this location and the sibling
        // and the parent
        volatile float_t* sibling1 = &(tree[loc]);
        volatile float_t* sibling2 = &(tree[sibling_loc]);      
        volatile float_t* parent = &(tree[parent_loc]);

        // write to the parent. Use a concurrent write mechanism
        // size_t spin_count = 0;
        //float_t old_value = *parent;
        //        float_t new_value = *sibling1 + *sibling2;
        //        while(!atomic_compare_and_swap(tree[parent_loc], old_value, new_value)) {
        //          old_value = *parent;
        //          new_value = *sibling1 + *sibling2;
        // if(++spin_count % 10 == 0) {
        //   std::cout << "Propagate_change: " << spin_count << std::endl;
        // }
        //      } 
        while(true) {
          float_t sum = (*sibling1) + (*sibling2);
          (*parent) = sum;
          __asm("mfence");
          float_t sum2 = (*sibling1) + (*sibling2);
          float_t parentval = (*parent);
          if (sum2 == parentval) break;
        }
        // If the update was successful accomplished by anothe thread
        // than return
        //if(old_value == new_value) return;
      } // end of for loop
    } // end of propagate change


  public:

    /** initialize a fast multinomail */
    fast_multinomial(size_t num_asg,
                     size_t ncpus) : 
      first_leaf_index(0),
      num_asg(num_asg) {
      // // initialize the generators
      // for(size_t i = 0; i < rngs.size(); ++i) {
      //   rngs[i].seed(rand());
      //   distributions.push_back(distribution_type(rngs[i]));
      // }
      // Determine the size of the tree
      first_leaf_index = next_powerof2(num_asg) - 1;
      size_t tree_size = first_leaf_index + next_powerof2(num_asg);
      tree.resize(tree_size, 0.0);
    }

    void zero(size_t asg) {
      assert(asg < num_asg);
      size_t loc = tree_loc_from_asg(asg);
      // Use CAS
      float_t old_value = tree[loc];
      float_t new_value = 0;
      while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
        old_value = tree[loc];
      }
      propagate_change(loc);
      if(old_value > 0) {
        num_support.dec();
      }      
    }
    
    //! Set a leaf value
    void set(size_t asg, float_t value) {
      assert(asg < num_asg);
      assert(value >= 0);
      size_t loc = tree_loc_from_asg(asg);
      // Use atomic compare and swap to update the value
      float_t old_value = tree[loc];
      float_t new_value = value;
      while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
        old_value = tree[loc];
      }
      if(old_value == 0 && new_value > 0) {
        num_support.inc();
      }
      propagate_change(loc);
      // Update support count
      if(old_value > 0 && new_value == 0) {
        num_support.dec();
      }
    } // end of set

    //! Set a leaf value
    void add(size_t asg, float_t value) {
      assert(asg < num_asg);
      assert(value >= 0);
      size_t loc = tree_loc_from_asg(asg);
      // Use atomic compare and swap to update the value
      float_t old_value = tree[loc];
      float_t new_value = value + old_value;
      while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
        old_value = tree[loc];
        new_value = value + old_value;
      }
      // Update support count
      if(old_value == 0 && new_value > 0) {
        num_support.inc();
      }
      propagate_change(loc);
    } // end of add

    //! Set a leaf value
    void max(size_t asg, float_t value) {
      assert(asg < num_asg);
      assert(value >= 0);
      size_t loc = tree_loc_from_asg(asg);
      // Use atomic compare and swap to update the value
      float_t old_value = tree[loc];
      float_t new_value = std::max(value, old_value);
      while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
        old_value = tree[loc];
        new_value = std::max(value, old_value);
      }
      if(old_value == 0 && new_value > 0) {
        num_support.inc();
      }
      propagate_change(loc);
      // Update support count
      if(old_value > 0 && new_value == 0) {
        num_support.dec();
      }
    } // end of set

    /**
     * Try to draw a sample from the multinomial.  If everything has
     * probability zero then return false.
     */
    bool sample(size_t& ret_asg, size_t cpuid) {
      // While there is positive support for some assignment
      // size_t spin_count = 0;
      volatile float_t *root = &(tree[0]);
      while(num_support.value > 0 || (*root) > 0) {
        // Try and get a sample
        if(try_sample(ret_asg, cpuid)) {
          assert(ret_asg < num_asg);
          return true;
        }

        //         if(++spin_count % 10000 == 0) {
        //           std::cout // << THREAD_ID() << ": " 
        //                     << "  Sample: " << spin_count 
        //                     << ", " << tree[0] 
        //                     << ", " << num_support.value 
        //                     << std::endl;
        //           float_t sum = 0;
        //           for(size_t i = first_leaf_index; i < tree.size(); ++i) {
        //             sum += tree[i];
        //           }
        //           std::cout << "Tree Sum: " << sum << std::endl;
        //           std::getchar();
        //         }       

      }  // End of While loop

      //       if(spin_count >= 10){
      //         std::cout // << THREAD_ID() << ": " 
      //                   << "  Sample_recover: " << spin_count << std::endl;
      //       }

      return false;
    } // end of sample

    
    /**
     * Try to draw a sample from the multinomial and zero out the
     * probability of the element that was drawn.  If everything has
     * probability zero then return false.
     */
    bool pop(size_t& ret_asg, size_t cpuid) {
      if(tree.empty()) return false;
      // While there is positive support for some assignment
      while(num_support.value > 0 || tree[0] > 0) { 
        // Try and get a sample
        if(try_sample(ret_asg, cpuid)) {
          assert(ret_asg < num_asg);
          // We have a sample but it is possible that another thread
          // also go this sample so we will use CAS to see who "wins"
          // and gets to keep the sample and who has to try again
          size_t loc = tree_loc_from_asg(ret_asg);
          // Use CAS 
          float_t old_value = tree[loc];
          float_t new_value = 0;
          while(!atomic_compare_and_swap(tree[loc], old_value, new_value)){
            old_value = tree[loc];
          }
          // Figure out if we won and get to keep the sample or if
          // some other thread won and zeroed out the sample before we
          // got it.
          if(old_value > 0) {
            // We win!!! and keep the sample :-)
            propagate_change(loc);
            num_support.dec();
            return true;
          } 
          // The other thread wins and we have to try agian :-(.
        }
      }

      std::cout << "Queue emptied!: " << tree[0]
                << ", " << num_support.value << std::endl;
      print_tree();
      return false;
    } // end of pop

    /** Get the number of assignments with positive support */
    size_t positive_support() {
      return num_support.value;
    }

    /** print the tree */
    void print_tree() {
      for (size_t i = 0; i < std::min(tree.size(), size_t(1000)); ++i) {
        if(is_leaf(i)) {
          std::cout << "Leaf(" << asg_from_tree_loc(i)
                    << ", [" << parent(i) << "], "
                    << tree[i] << ") ";
        } else {
          std::cout << "Node(" << i <<  ", " 
                    << "[" << left_child(i) << ", "
                    << right_child(i) << "], "
                    << tree[i] << ") ";
        }
      }
      std::cout << std::endl;
    }
    
    float_t get_weight(size_t asg) const {
      size_t loc = tree_loc_from_asg(asg);
      return tree[loc];
    }

    bool has_support(size_t asg) const {
      size_t loc = tree_loc_from_asg(asg);
      return tree[loc] > 0;
    }

    void clear() {
      // not thread safe
      std::fill(tree.begin(), tree.end(), 0.0);
      num_support.value = 0;
    }
  }; // end of fast_multinomial
  
} // end of namespace

#undef float_t

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/util/fiber_blocking_queue.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FIBER_BLOCKING_QUEUE_HPP
#define GRAPHLAB_FIBER_BLOCKING_QUEUE_HPP


#include <list>
#include <deque>
#include <queue>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/parallel/fiber_control.hpp>
#include <graphlab/macros_def.hpp>

namespace graphlab {

   /** 
    * \ingroup util
    * \brief Implements a blocking queue useful for producer/consumer models.
    * Similar to the \ref blocking_queue, but requires all threads waiting on 
    * the queue to be fibers. Also only supports the basic wait on dequeue 
    * operation, and not the other timed_wait, or wait_until_empty operations
    * supported by the \ref blocking_queue
    */
  template<typename T>
  class fiber_blocking_queue {
  protected:
    
    typedef typename std::deque<T> queue_type;

    bool m_alive;
    queue_type m_queue;
    mutex m_mutex;
    std::queue<size_t> fiber_queue;
   
    volatile uint16_t sleeping;


  public:
    
    //! creates a blocking queue
    fiber_blocking_queue() : m_alive(true),sleeping(0) { }
   
    void wake_a_fiber() {
      if (!fiber_queue.empty()) {
        size_t fiber_id = fiber_queue.front();
        fiber_queue.pop();
        fiber_control::schedule_tid(fiber_id);
      }
    }

    void wake_all_fibers() {
      while(!fiber_queue.empty()) {
        size_t fiber_id = fiber_queue.front();
        fiber_queue.pop();
        fiber_control::schedule_tid(fiber_id);
      }
    }

    void fiber_sleep() {
      fiber_queue.push(fiber_control::get_tid());
      fiber_control::deschedule_self(&m_mutex.m_mut);
      m_mutex.lock();
    }

    //! Add an element to the blocking queue
    inline void enqueue(const T& elem, bool wake_consumer = true) {
      m_mutex.lock();
      m_queue.push_back(elem);
      // Signal threads waiting on the queue
      if (wake_consumer && sleeping) wake_a_fiber();
      m_mutex.unlock();
    }

    //! Add an element to the blocking queue
    inline void enqueue_to_head(const T& elem) {
      m_mutex.lock();
      m_queue.push_front(elem);
      // Signal threads waiting on the queue
      if (sleeping) wake_a_fiber();
      m_mutex.unlock();
    }


    bool empty_unsafe() {
      return m_queue.empty();
    }

    bool is_alive() {
      return m_alive;
    }

    void swap(queue_type &q) {
      m_mutex.lock();
      q.swap(m_queue);
      m_mutex.unlock();
    }


    inline bool wait_for_data() {
      m_mutex.lock();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      while(m_queue.empty() && m_alive) {
        sleeping++;
        fiber_sleep();
        sleeping--;
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
      } 
      m_mutex.unlock();

      return success; 
    }


    /**
     * Blocks until an element is available in the queue 
     * or until stop_blocking() is called.
     * The return value is a pair of <T value, bool success>
     * If "success" if set, then "value" is valid and 
     * is an element popped from the queue.
     * If "success" is false, stop_blocking() was called 
     * and the queue has been destroyed.
     */
    inline std::pair<T, bool> dequeue() {

      m_mutex.lock();
      T elem = T();
      bool success = false;
      // Wait while the queue is empty and this queue is alive
      while(m_queue.empty() && m_alive) {
        sleeping++;
        fiber_sleep();
        sleeping--;
      }
      // An element has been added or a signal was raised
      if(!m_queue.empty()) {
        success = true;
        elem = m_queue.front();
        m_queue.pop_front();
      } 
      m_mutex.unlock();

      return std::make_pair(elem, success);
    }

    /**
    * Returns an element if the queue has an entry.
    * returns [item, false] otherwise.
    */
    inline std::pair<T, bool> try_dequeue() {
      if (m_queue.empty() || m_alive == false) return std::make_pair(T(), false);
      m_mutex.lock();
      T elem = T();
      // Wait while the queue is empty and this queue is alive
      if (m_queue.empty() || m_alive == false) {
        m_mutex.unlock();
        return std::make_pair(elem, false);
      }
      else {
        elem = m_queue.front();
        m_queue.pop_front();
      }
      m_mutex.unlock();

      return std::make_pair(elem, true);
    }

    //! Returns true if the queue is empty
    inline bool empty() { 
      m_mutex.lock();
      bool res = m_queue.empty();
      m_mutex.unlock();
      return res;
    }

    /** Wakes up all threads waiting on the queue whether 
        or not an element is available. Once this function is called,
        all existing and future dequeue operations will return with failure.
        Note that there could be elements remaining in the queue after 
        stop_blocking() is called. 
    */
    inline void stop_blocking() {
      m_mutex.lock();
      m_alive = false;
      wake_all_fibers();
      m_mutex.unlock();
    }

    /**
      Resumes operation of the fiber_blocking_queue. Future calls to
      dequeue will proceed as normal.
    */
    inline void start_blocking() {
      m_mutex.lock();
      m_alive = true;
      m_mutex.unlock();
    }
    
    //! get the current size of the queue
    inline size_t size() {
      return m_queue.size();
    }

    
    /**
     * Causes any threads currently blocking on a dequeue to wake up
     * and evaluate the state of the queue. If the queue is empty,
     * the threads will return back to sleep immediately. If the queue
     * is destroyed through stop_blocking, all threads will return. 
     */
    void broadcast() {
      m_mutex.lock();
      wake_all_fibers();
      m_mutex.unlock();
    }
    
    
    ~fiber_blocking_queue() {
      m_alive = false;
      broadcast();
    }    
  }; // end of fiber_blocking_queue class
  

} // end of namespace graphlab

#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/util/fs_util.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/version.hpp>
#include <boost/filesystem.hpp>
#include <boost/algorithm/string/predicate.hpp>


#include <vector>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>


#include <graphlab/util/fs_util.hpp>


void graphlab::fs_util::
list_files_with_suffix(const std::string& pathname,
                       const std::string& suffix,
                       std::vector<std::string>& files) {
  namespace fs = boost::filesystem;
  fs::path dir_path(pathname);
  fs::directory_iterator end_iter;
  files.clear();
  if ( fs::exists(dir_path) && fs::is_directory(dir_path)) {
    for( fs::directory_iterator dir_iter(dir_path) ; 
         dir_iter != end_iter ; ++dir_iter) {
      if (fs::is_regular_file(dir_iter->status()) ) {
#if BOOST_FILESYSTEM_VERSION >= 3 
        const std::string filename = dir_iter->path().filename().string();
#else
        const std::string filename = dir_iter->leaf();
#endif
        if (suffix.size() > 0 && !boost::ends_with(filename, suffix)) 
          continue;
        files.push_back(filename);
      }
    }
  }
  std::sort(files.begin(), files.end());
//   namespace fs = boost::filesystem;
//   fs::path path(pathname);
//   assert(fs::exists(path));
//   for(fs::directory_iterator iter( path ), end_iter; 
//       iter != end_iter; ++iter) {
//     if( ! fs::is_directory(iter->status()) ) {

// #if BOOST_FILESYSTEM_VERSION >= 3
//       std::string filename(iter->path().filename().string());
// #else
//       std::string filename(iter->path().filename());
// #endif
//       size_t pos = 
//         filename.size() >= suffix.size()?
//         filename.size() - suffix.size() : 0;
//       std::string ending(filename.substr(pos));
//       if(ending == suffix) {
// #if BOOST_FILESYSTEM_VERSION >= 3
//         files.push_back(iter->path().filename().string());
// #else
//         files.push_back(iter->path().filename());
// #endif
//       }
//     }
//   }
//  std::sort(files.begin(), files.end());
} // end of list files with suffix  


void graphlab::fs_util::
list_files_with_prefix(const std::string& pathname,
                       const std::string& prefix,
                       std::vector<std::string>& files) {
  namespace fs = boost::filesystem;  
  fs::path dir_path(pathname);
  fs::directory_iterator end_iter;
  files.clear();
  if ( fs::exists(dir_path) && fs::is_directory(dir_path)) {
    for( fs::directory_iterator dir_iter(dir_path) ; 
         dir_iter != end_iter ; ++dir_iter) {
      if (fs::is_regular_file(dir_iter->status()) ) {
        const std::string filename = dir_iter->path().filename().string();
        if (prefix.size() > 0 && !boost::starts_with(filename, prefix)) {
          continue;
        }
        files.push_back(dir_iter->path().string());
      }
    }
  }
  std::sort(files.begin(), files.end());
} // end of list files with prefix


std::string graphlab::fs_util::
change_suffix(const std::string& fname,
              const std::string& new_suffix) {             
  size_t pos = fname.rfind('.');
  assert(pos != std::string::npos); 
  const std::string new_base(fname.substr(0, pos));
  return new_base + new_suffix;
} // end of change_suffix


================================================
FILE: src/graphlab/util/fs_util.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FS_UTIL
#define GRAPHLAB_FS_UTIL

#include <string>
#include <vector>


namespace graphlab {

  namespace fs_util {

    /**
     * List all the files with the given suffix at the pathname
     * location
     */
    void list_files_with_suffix(const std::string& pathname,
                                const std::string& suffix,
                                std::vector<std::string>& files);


    /**
     * List all the files with the given prefix at the pathname
     * location
     */
    void list_files_with_prefix(const std::string& pathname,
                                const std::string& prefix,
                                std::vector<std::string>& files);


    /// \ingroup util_internal
    std::string change_suffix(const std::string& fname,
                                     const std::string& new_suffix);

  }; // end of fs_utils


}; // end of graphlab
#endif


================================================
FILE: src/graphlab/util/generate_pds.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <vector>
namespace graphlab {
  class pds {
   public:
     pds() {}
     std::vector<size_t> get_pds(size_t p) {
       std::vector<size_t> result = find_pds(p);
       // verify pdsness
       size_t pdslength = p *p + p + 1;
       std::vector<size_t> count(pdslength, 0);
       for (size_t i = 0;i < result.size(); ++i) {
         for (size_t j = 0;j < result.size(); ++j) {
           if (i == j) continue;
           count[(result[i] - result[j] + pdslength) % pdslength]++;
         }
       }
       bool ispds = true;
       for (size_t i = 1;i < count.size(); ++i) {
         if (count[i] != 1) ispds = false;
       }

       // If success, return the result, else, return empty vector.
       if (ispds) {
         return result;
       } else {
         logstream(LOG_ERROR) << "Fail to generate pds for p = " << p << std::endl;
         return std::vector<size_t>();
       }
     }

   private:
     bool test_seq(size_t a, size_t b, size_t c, size_t p, std::vector<size_t>& result) {
       std::vector<size_t> seq;
       size_t pdslength = p*p + p + 1;
       seq.resize(pdslength + 3);
       seq[0] = 0; seq[1] = 0; seq[2] = 1;
       size_t ctr = 2;
       for (size_t i = 3; i < seq.size(); ++i) {
         seq[i] = a * seq[i - 1] + b * seq[i - 2] + c * seq[i - 3];
         seq[i] = seq[i] % p;
         ctr += (seq[i] == 0);
         // PDS must be of length p + 1
         // and are the 0's of seq.
         if (i < pdslength && ctr > p + 1) return false;
       }
       if (seq[pdslength] == 0 && seq[pdslength + 1] == 0){ 
         // we are good to go
         // now find the 0s
         for (size_t i = 0; i < pdslength; ++i) {
           if (seq[i] == 0) {
             result.push_back(i);
           }
         }
         // probably not necessary. but verify that the result has length p + 1
         if (result.size() != p + 1) {
           result.clear();
           return false;
         }
         return true;
       }
       else {
         return false;
       } 
     }


     std::vector<size_t> find_pds(size_t p) {
       std::vector<size_t> result;
       for (size_t a = 0; a < p; ++a) {
         for (size_t b = 0; b < p; ++b) {
           if (b == 0 && a == 0) continue;
           for (size_t c = 1; c < p; ++c) {
             if (test_seq(a,b,c,p,result)) {
               return result;
             }
           }
         }
       } 
       return result;
     }
  }; // end of pds class
} // end of graphlab namespace


================================================
FILE: src/graphlab/util/generics/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/util/generics/any.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/generics/any.hpp>

namespace graphlab {


  /**
   * Define the static registry for any
   */
  any::registry_map_type& any::get_global_registry() {
    static any::registry_map_type global_registry;
    return global_registry;
  }


  any::iholder* any::iholder::load(iarchive_soft_fail &arc) {
    registry_map_type& global_registry = get_global_registry();
    uint64_t idload;
    arc >> idload;
    registry_map_type::const_iterator iter = global_registry.find(idload);
    if(iter == global_registry.end()) {
      logstream(LOG_FATAL) 
        << "Cannot load object with hashed type [" << idload 
        << "] from stream!" << std::endl
        << "\t A possible cause of this problem is that the type" 
        << std::endl
        << "\t is never explicity used in this program.\n\n" << std::endl;
      return NULL;
    }
    // Otherwise the iterator points to the deserialization routine
    // for this type
    return iter->second(arc);
  }
  

} // end of namespace graphlab


std::ostream& operator<<(std::ostream& out, const graphlab::any& any) {
  return any.print(out);
} // end of operator << for any


================================================
FILE: src/graphlab/util/generics/any.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// Modified from boost 1.37 boost::any
// Extended to handle graphlab graphlab/serialization/deserialization functions
// See http://www.boost.org/libs/any for Documentation.

#ifndef GRAPHLAB_ANY_INCLUDED
#define GRAPHLAB_ANY_INCLUDED

// what:  variant type boost::any
// who:   contributed by Kevlin Henney,
//        with features contributed and bugs found by
//        Ed Brey, Mark Rodgers, Peter Dimov, and James Curran
// when:  July 2001
// where: tested with BCC 5.5, MSVC 6.0, and g++ 2.95

#include <algorithm>
#include <typeinfo>
#include <map>
#include <iostream>
#include <stdint.h>

#include <boost/type_traits/remove_reference.hpp>
#include <boost/type_traits/is_reference.hpp>
#include <boost/throw_exception.hpp>
#include <boost/static_assert.hpp>
#include <boost/utility.hpp>
#include <boost/exception/detail/is_output_streamable.hpp>
#include <boost/functional/hash.hpp>


#include <graphlab/logger/assertions.hpp>
#include <graphlab/serialization/serialization_includes.hpp>

namespace graphlab {
  /**
   A generic "variant" object obtained from Boost::Any and modified to
   be serializable. A variable of type "any" can store any datatype
   (even dynamically changeable at runtime), but the caveat is that
   you must know the exact stored type to be able to extract the data
   safely.
   
   To serialize/deserialize the any, regular serialization procedures
   apply.  However, since a statically initialized type registration
   system is used to identify the type of the deserialized object, so
   the user must pay attention to a couple of minor issues.
   
   On serialization: 
   
   \li \b a) If an any contains a serializable type, the any can be
             serialized.
   \li \b b) If an any contains an unserializable type, the
             serialization will fail at runtime.
   
   On deserialization:
   
   \li \b c) An empty any can be constructed with no type information
             and it can be deserialized from an archive.
   \li \b d) However, the deserialization will fail at runtime if the
             true type of the any is never accessed / instantiated
             anywhere in the code.
             
   Condition \b d) is particular unusual so I will illustrate with an
   example.
   
   Given a simple user struct:
   \code
   struct UserStruct {
     int i;
     void save (graphlab::oarchive& oarc) const {
       oarc << i;
     }
     void load (graphlab::iarchive& iarc) {
       iarc << i;
     }
   }
  \endcode
  
   If an any object contains the struct, it will be serializable.
   
   \code
   UserStruct us;
   us.i = 10;
   any a = us;
   // output file
   std::ofstream fout("test.bin");
   graphlab::oarchive oarc(fout);
   oarc << a;    // write the any
   \endcode
   
   To deserialize, I will open an input archive and stream into an any.
   
   \code
   // open input 
   std::ifstream fin("test.bin");
   graphlab::iarchive iarc(fin);
   // create an any and read it
   any a;
   iarc >> a; 
   \endcode
   
   Now, unusually, the above code will fail, while the following code
   will succeed
   
   \code
   // open input 
   std::ifstream fin("test.bin");
   graphlab::iarchive iarc(fin);
   // create an any and read it
   any a;
   iarc >> a; 
   std::cout << a.as<UserStruct>().i;
   \endcode
   
   The <tt> a.as<UserStruct>() </tt> forces the instantiation of static functions
   which allow the any deserialization to identify the UserStruct type.
  */
  class any {
  private:
    /**
     * iholder is the base abstract type used to store the contents
     */
    class iholder {
    public: // structors
      virtual ~iholder() { }
      virtual const std::type_info& type() const = 0;
      virtual iholder * clone() const = 0;
      virtual uint64_t deserializer_id() const = 0;
      virtual void deep_op_equal(const iholder* c) = 0;
      static iholder* load(iarchive_soft_fail &arc);
      virtual void save(oarchive_soft_fail& arc) const = 0;
      virtual std::ostream& print(std::ostream& out) const = 0;
    };
    iholder* contents;

  public: // structors
    /// default constructor. Creates an empty any
    any() : contents(NULL) { }

    /// Creates an any which stores the value
    template<typename ValueType>
    explicit any(const ValueType& value)
      : contents(new holder<ValueType>(value)) { }

    /// Construct an any from another any
    any(const any & other) : 
      contents(other.empty() ? NULL : other.contents->clone()) { }

    /// Destroy the contentss of this any
    ~any() { delete contents; }

    /// Returns true if the object does not contain any stored data
    bool empty() const { return contents == NULL; }

    /// Extracts a reference to the contents of the any as a type of
    /// ValueType
    template<typename ValueType>
    ValueType& as() {
      DASSERT_TRUE(type() == typeid(ValueType));
      DASSERT_FALSE(empty());
      return static_cast<holder<ValueType> *>(contents)->contents;
    }

    /// Extracts a constant reference to the contents of the any as a
    /// type of ValueType
    template<typename ValueType>
    inline const ValueType& as() const{
      DASSERT_TRUE(type() == typeid(ValueType));
      DASSERT_FALSE(empty());
      return static_cast< holder<ValueType> *>(contents)->contents;
    }

    /// Exchanges the contents of two any's
    any& swap(any & rhs) {
      std::swap(contents, rhs.contents);
      return *this;
    }

    /**
     * Update the contents of this any.  If a new type is used than
     * the type of this any will change.
     */
    template<typename ValueType>
    any& operator=(const ValueType & rhs) {
      if (contents != NULL && contents->type() == typeid(ValueType)) {
        as<ValueType>() = rhs;
      } else { any(rhs).swap(*this); }
      return *this;
    }    

    /**
     * Update the contents of this any to match the type of the other
     * any.
     */
    any& operator=(const any & rhs) {
      if (rhs.empty()) {
        if (contents) delete contents;
        contents = NULL;
      } else {
        if (contents != NULL && contents->type() == rhs.contents->type()) {
          contents->deep_op_equal(rhs.contents);
        } else { any(rhs).swap(*this); }
      }
      return *this;
    }

    std::ostream& print(std::ostream& out) const {     
      return empty()? (out << "EMPTY") : contents->print(out);        
    }

    /// Returns the type information of the stored data.
    const std::type_info& type() const {
      return empty() ? typeid(void) : contents->type();
    }
    
    /// Return the name of the internal type as a string.
    const std::string type_name() const {
      return empty() ? "NULL" : std::string(contents->type().name());
    }

    /// loads the any from a file.
    void load(iarchive& arc) {
      iarchive_soft_fail isoftarc(arc);
      if(contents != NULL) { delete contents; contents = NULL; }
      bool isempty(true);
      isoftarc >> isempty;
      if (isempty == false) contents = iholder::load(isoftarc);      
    }
    
    /// Saves the any to a file. Caveats apply. See the main any docs.
    void save(oarchive& arc) const {
      oarchive_soft_fail osoftarc(arc);
      bool isempty = empty();
      osoftarc << isempty;
      if (isempty == false) contents->save(osoftarc);
    }


  public:
    /**
     * This section contain the global registry used to determine the
     * deserialization code for a particular type.  Essentially the
     * registry is a global map in which all subtypes of iholder
     * register a deserialization function with their type. 
     */

    typedef iholder* (*deserialize_function_type)(iarchive_soft_fail& arc);
    typedef std::map<uint64_t, deserialize_function_type> registry_map_type;
    /**
     * The get registry routine is a static method that gets a
     * reference to the global registry.  It is very important that
     * this be a static method and not a static member to ensure that
     * the global registry is defined before each holders try to
     * register.  This is accomplished by having get_registry
     * statically declare the global registry
     */
    static registry_map_type& get_global_registry();
    
  public:

    template <typename ValueType> static
    typename boost::disable_if_c<boost::is_output_streamable<ValueType>::value, 
                                 void>::type 
    print_type_or_contents(std::ostream& out, const ValueType &h) { 
      out << "Not_Printable[" << typeid(ValueType).name() << ']'; 
    }

    template <typename ValueType> static
    typename boost::enable_if_c<boost::is_output_streamable<ValueType>::value, 
                                void>::type 
    print_type_or_contents(std::ostream& out, const ValueType &h) { out << h; }


  public:

    /**
     * holder is an instantiation of iholder
     */
    template<typename ValueType>
    class holder : public iholder {
    public: 
      typedef ValueType value_type;   
      /// The actual contents of the holder
      ValueType contents;
      /// Construct a holder from a value
      holder(const ValueType& value) : contents(value) { }
      /// Construct a holder from an archive
      holder(iarchive_soft_fail& arc) { arc >> contents; }
      /// Get the type info of the holder
      const std::type_info& type() const { return typeid(ValueType); }
      /// Clone a holder
      iholder* clone() const { return new holder(contents); }
      /// Deep assignment
      void deep_op_equal(const iholder* other) {
        contents = static_cast< const holder<ValueType>* >(other)->contents;
      }
      /**
       * Get the deserializer id from the static registry associated
       * with this type of holder
       */
      uint64_t deserializer_id() const { return registry.localid; }
      void save(oarchive_soft_fail &arc) const { 
        arc << registry.localid << contents; 
      }    
      /**
       * Print the contents or the type if the contents does not
       * support printing
       */
      std::ostream& print(std::ostream& out) const {
        any::print_type_or_contents(out, contents);
        return out;
      }
      /** The actual deserialization function for this holder type */
      static iholder* deserialize(iarchive_soft_fail &arc) {
        return new holder(arc);
      }
      /**
       * The following struct defines the static member used to
       * automatically register the deserialization function for this
       * holder type and cache a shared id used to quickly identify
       * the deserialization function.
       *
       * Note that the registry actually uses the NAME of the type so
       * renaming a type will result in an incompatible
       * deserialization.
       */
      struct registry_type {
        uint64_t localid; 
        registry_type() { 
          boost::hash<std::string> hash_function;
          // compute localid
          localid = hash_function(typeid(ValueType).name());
          any::get_global_registry()[localid] = holder::deserialize;
        }        
      }; // end of registry type
      /**
       * The registry is a static member that will get constructed
       * before main and used to register the any type
       */
      static registry_type registry;        
    private: 
      holder& operator=(const holder& other) { }
    }; // end of class holder

  }; // end of class any
  

  /**
   * This static membery computes the holder (type specific)
   * deserialization id and also registers it with the global
   * registry.
   */
  template<typename ValueType>
  typename any::holder<ValueType>::registry_type any::holder<ValueType>::registry;

} // namespace graphlab

std::ostream& operator<<(std::ostream& out, const graphlab::any& any);


// Copyright Kevlin Henney, 2000, 2001, 2002. All rights reserved.
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#endif


================================================
FILE: src/graphlab/util/generics/blob.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BLOB_HPP
#define GRAPHLAB_BLOB_HPP

#include <cstring>
#include <cstdlib>
#include <cassert>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/vector.hpp>
#include <graphlab/serialization/map.hpp>

namespace graphlab {
  /**
   * blob is the general representation of a "block" of information.
   *. 'data' must be exactly 'length' bytes and must be entirely
   * self contained.  It must not hold references to other memory
   * regions.  That is to say, I should be able read off exactly
   * 'length' bytes from 'data', and send it across a network/write it
   * to a disk/etc, and the information should still be consistent
   * The blob is self-managed and will free and delete the underlying memory
   * when it goes out of scope.
   */
  class blob {
        size_t size_;    /// number of bytes of the 'data' field
        void* data_;     /// user information
  public:

    /** Create an empty blob */
    blob() : size_(0), data_(NULL) { }

    /** Create simple blob of a certain size (with allocation)*/
    blob(size_t new_size) :
      size_(0), data_(NULL) {
      resize(new_size);
    } // end of basic blob constructor

    /** Makes a copy of the ptr provided */
    blob(size_t osize, void* odata) : size_(0), data_(NULL) {
      if (osize > 0) { copy(osize, odata);  }
    } // end of basic blob constructor

    /** Copy constructor */
    blob(const blob &b) : size_(0), data_(NULL) {
      if (b.size_ != 0 && b.data_ != NULL) {
        copy(b);
      }
    }

    ~blob() { clear();  }

    /** Smart Casting */
    template<typename T>
    T& as() {
      assert(data_ != NULL);
      assert(sizeof(T) <= size_);
      return *reinterpret_cast<T*>(data_);
    }

    /** Smart Casting */
    template<typename T>
    const T& as() const {
      assert(data_ != NULL);
      assert(sizeof(T) <= size_);
      return *reinterpret_cast<T*>(data_);
    }

    /** Smart Casting */
    template<typename T>
    T* as_ptr() {
      assert(data_ != NULL);
      assert(sizeof(T) <= size_);
      return reinterpret_cast<T*>(data_);
    }

    /** Smart Casting */
    template<typename T>
    const T* as_ptr() const {
      assert(data_ != NULL);
      assert(sizeof(T) <= size_);
      return reinterpret_cast<const T*>(data_);
    }

    /** Get the size of the blob */
    size_t size() const { return size_; }

    /** Get the size of the blob */
    void* data() { return data_; }

    /** Get the size of the blob */
    const void* data() const { return data_; }


    blob& operator=(const blob& b){
      copy(b);
      return *this;
    }


    /** make a copy of the data passed in as arguments. */
    void copy(size_t osize, void* odata) {
      resize(osize);
      // Copy the contents over
      memcpy(data_, odata, osize);
    }

    /** Make "deep" copy of the blob by replicate its binary data */
    void copy(const blob& other) {
      assert(other.size_ == 0 || other.data_ != NULL);
      // Do an allocation (which is only done if necessary)
      resize(other.size_);
      // Copy the contents over
      memcpy(data_, other.data_, size_);
    }

    /** deprecated. Just use operator= */
    blob copy() const{
      return *this;
    }
    /** Resize the blob to any size including 0 */
    void resize(size_t new_size) {
      if(new_size == 0) {
        // if resize to zero then just clear
        clear();
      } else if(size_ == new_size ) {
        // if resize to current size then nop
        assert(data_ != NULL);
      } else {
        clear();
        assert(data_ == NULL && size_ == 0);
        size_ = new_size;
        data_ = malloc(new_size);
        assert(data_ != NULL);
      }
    } // end of malloc


    /** free the memory associated with this blob */
    void clear() {
      if(data_ != NULL) {
        assert(size_ > 0);
        free(data_);
        data_ = NULL;
      }
      size_ = 0;
    } // end of free


    /** Swaps the contents of two blobs. A "safe" version of a shallow copy */
    void swap(blob &b) {
      void* tmp = b.data_;
      size_t tmpsize = b.size_;

      b.data_ = data_;
      b.size_ = size_;

      data_ = tmp;
      size_ = tmpsize;
    }

    void load(iarchive& arc) {
      clear();
      arc >> size_;
      if (size_ == 0) {
        data_ = NULL;
      } else {
        data_ = malloc(size_);
        deserialize(arc, data_, size_);
      }
    }

    void save(oarchive& arc) const {
      arc << size_;
      if (size_ != 0) {
        serialize(arc, data_, size_);
      }
    }
  }; // end of blob


} // end of namespace
#endif


================================================
FILE: src/graphlab/util/generics/block_linked_list.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_BLOCK_LINK_LIST_HPP
#define GRAPHLAB_BLOCK_LINK_LIST_HPP
#include <graphlab/util/generics/dynamic_block.hpp>
#include <graphlab/logger/assertions.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/iterator/iterator_adaptor.hpp>
#include <boost/type_traits/is_convertible.hpp>
#include <boost/utility/enable_if.hpp>

#include <stdint.h>
#include <algorithm>

namespace graphlab {
  template<typename valuetype, uint32_t blocksize=(4096-20)/sizeof(valuetype)>
  /**
   * This class represents a forward linked list of dynamic block.
   */
  class block_linked_list {
   public:
     typedef dynamic_block<valuetype, blocksize> blocktype;

   //////////////////// Constructors ///////////////////////// 
   public:
     /// Construct empty list
     block_linked_list() : head(NULL), tail(NULL), _size(0) { }

     /// Construct list from container 
     template<typename InputIterator>
     block_linked_list(InputIterator first, InputIterator last) { 
       if (first == last) 
         return;
       assign(first, last);
     }

     /// Destructor
     ~block_linked_list() { clear(); }

     /**
      * Assign the list with values from given iterator pair. 
      */
     template<typename InputIterator>
     void assign(InputIterator first, InputIterator last)  {

       size_t new_size = last - first;
       if (new_size == 0)
         return;

       if (_size > 0)
         clear();

       InputIterator iter = first;

       blocktype* current = head;
       // if the list is empty, create the head block.
       if (current == NULL) {
         current = head = tail = new blocktype();
       }

       while (iter != last) {
         InputIterator end =  std::min(iter+blocksize, last);
         current->assign(iter, end);
         iter = end;
         if (iter != last) {
           current = new blocktype();
           tail->_next = current;
           tail = current;
         }
       }
       _size = new_size; 
     }

     /// Returns the size of the list
     size_t size() const { return _size; }

     static const size_t get_blocksize() {
       return blocksize;
     }

   //////////////////// Iterator API ///////////////////////// 
   public:
     /**
      * Defines the iterator of the values stored in the list.
      * The iterator has random_access_traversal_tag, however
      * the actual random access takes O(n/blocksize);
      */
     template <typename value>
     class value_iterator :
         public boost::iterator_facade<value_iterator<value>, value,
                                       boost::random_access_traversal_tag> {
     private:
        struct enabler {};

     public:
        value_iterator(blocktype* blockptr, uint32_t offset) : 
            blockptr(blockptr), offset(offset) { }

        template <typename othervalue>
            value_iterator(value_iterator<othervalue> const& other,
                        typename boost::enable_if<
                        boost::is_convertible<othervalue*,value*>,
                        enabler>::type = enabler()) 
            : blockptr(other.blockptr), offset(other.offset) { }


     public:
        // public wrapper of the private distance_to
      ptrdiff_t pdistance_to(const value_iterator& other) const {
        return distance_to(other);
      }

      // core access functions
      private:         
        friend class boost::iterator_core_access;
        template <class> friend class value_iterator;

        /////////////////  Forward traversal core functions ///////////////
        void increment() { 
          if (offset < blockptr->size()-1) {
            ++offset;
          } else {
            blockptr = blockptr->_next;
            offset = 0;
          }
        }

        template <typename othervalue>
        bool equal(value_iterator<othervalue> const& other) const {
          return (blockptr == other.blockptr) && (offset == other.offset);
        }

        value& dereference() const { 
          return blockptr->values[offset];
        }

        /////////////////  Random access core functions ///////////////
        void advance(int n) {
          size_t dist = n+offset;
          while(dist >= blockptr->size() && blockptr != NULL) {
            dist -= blockptr->size();
            blockptr = blockptr->next();
          } 
          if (blockptr == NULL) {
            offset = 0;
          } else {
            offset = dist;
          }
        }

        ptrdiff_t distance_to(const value_iterator& other) const {
          ptrdiff_t dist = 0;
          if (blockptr == other.blockptr) {
            return ((ptrdiff_t)other.offset - offset);
          } else {
            blocktype* cur = blockptr;
            // try moving forward until we hit other or NULL
            while (cur != NULL && cur != other.blockptr) {
              dist += cur->size();
              cur = cur->next();
            }

            // this catched other
            if (cur != NULL || other.blockptr == NULL) {
              return (dist + (ptrdiff_t)other.offset - offset);
            } else {
              logstream(LOG_INFO)<< "block list iterator reverse direction!!" << std::endl; 
              // this hit the dead end, need to move backwards 
              dist = 0;
              cur = other.blockptr;
              while (cur != NULL && cur != blockptr) {
                dist += cur->size();
                cur = cur->next();
              }
              return -(dist + (ptrdiff_t)offset - other.offset);
            }
          }
        }

     // For internal use only. Get access to the internal block pointer and offset.
     public: 
        // returns block pinter 
        blocktype*& get_blockptr() {
          return blockptr;
        }
        uint32_t& get_offset() {
          return offset;
        }

      private:
        blocktype* blockptr;
        uint32_t offset;
     }; // end of value iterator

     typedef value_iterator<valuetype> iterator; 
     typedef value_iterator<valuetype const> const_iterator; 

     iterator begin() {
       return iterator(head, 0);
     }

     iterator end() {
       return iterator(NULL, 0);
     }

     const_iterator begin() const {
       return const_iterator(head, 0);
     }

     const_iterator end() const {
       return const_iterator(NULL, 0);
     }

   //////////////////// Insertion API ///////////////////////// 
   /*
    * Insert value into the location of iter.
    * Split the block when necessary.
    * Returns iterator to the new value.
    */
   iterator insert(iterator iter, const valuetype& val) {
     iterator ins_iter = get_insert_iterator(iter);
     blocktype* ins_ptr = ins_iter.get_blockptr();
     uint32_t offset = ins_iter.get_offset();
     ASSERT_TRUE(ins_ptr != NULL);
     if (ins_ptr->is_full()) {
       // split the block
       ins_ptr->split();
       if(ins_ptr == tail) {
         // update tail pointer
         tail = ins_ptr->next();
       }
       if (offset >= blocksize/2) {
         ins_ptr = ins_ptr->next();
         offset -= (blocksize/2);
       }
     }
     ins_ptr->insert(val,offset);
     ++_size;
     return iterator(ins_ptr, offset);
   }
   
   /**
    * Insert a range of values into the position of the given iterator.
    * Will create new blocks after the given block when necessary.
    *
    * Returns the begin and end iterator to the new elements. 
    *
    * \note 
    * This operation will NOT affect the blocks after the given block.
    * |x1,x2,y1,y2,y3,y4 , _ , _ , _ , _| -> | ... | -> |...|
    *        ^ 
    *        p
    * blocksize = 10
    * iterator: blockptr = p, offset = 2
    */
   template<typename InputIterator>
   std::pair<iterator, iterator> 
     insert(iterator iter, InputIterator first, InputIterator last) {

     typedef std::pair<iterator, iterator> ret_type;
     const size_t len = last - first;
     if (len == 0) return ret_type(iter, iter);

     iterator ins_iter = get_insert_iterator(iter);

     // Pointers to the block of the insertion point.
     blocktype* ibegin_ptr = ins_iter.get_blockptr(); 
     // number of elements before the insertion point in the block 
     size_t nx = ins_iter.get_offset();
     // number of elements after the insertion point in the block 
     size_t ny = ibegin_ptr->size()-nx;

     // save ys 
     valuetype* swap = (valuetype*)malloc((ny)*sizeof(valuetype));
     memcpy(swap, &(ibegin_ptr->values[nx]), ny*sizeof(valuetype)) ;

     // remove y temporarily
     ibegin_ptr->_size -= ny;
     _size -= ny;

     // Insert new elements to the end of the block,
     // record the begin and end iterators
     ret_type iter_pair = append_to_block(ibegin_ptr, first, last);

     iterator begin_ins_iter = iter_pair.first;
     iterator end_ins_iter = iter_pair.second;
     // the end iterator returned by append_to_block may need adjustment
     if (end_ins_iter.get_offset() == end_ins_iter.get_blockptr()->size()) {
       end_ins_iter.get_blockptr() = end_ins_iter.get_blockptr()->next();
       end_ins_iter.get_offset() = 0;
     }

     // add y back 
     if (ny > 0) {
       blocktype* iend_ptr = iter_pair.second.get_blockptr();
       ret_type iter_pair2 = append_to_block(iend_ptr, swap, swap+ny); 
       end_ins_iter = iter_pair2.first;
     }

     return ret_type(begin_ins_iter, end_ins_iter);
   }

   /*
    * Move up the content of the next block to the end of this block. 
    * Delete the next block if it becomes empty. 
    */
   void merge_next(blocktype* bptr) {
     if (bptr == NULL || bptr->next() == NULL || bptr->is_full()) {
       return;
     }
     blocktype* nextptr = bptr->next();
     size_t spaceleft = blocksize - bptr->size(); // num space left
     size_t nnext = nextptr->size(); // num elems in the next block
     size_t nmove = std::min(spaceleft, nnext); // num elems to move

     // move up nmove elements
     std::copy(nextptr->values, nextptr->values+nmove,
               bptr->values + bptr->_size);
     if (nnext > nmove) {
       valuetype* p = nextptr->values;
       for (size_t i = 0; i < (nnext-nmove); ++i) {
         *(p+i) = *(p+i+nmove);
       }
     }

     // update the size
     bptr->_size += nmove; 
     nextptr->_size -= nmove;

     // remove the next block if empty
     if (nextptr->size() == 0) {
       bptr->_next = nextptr->next();
       if (nextptr == tail) {
         tail = bptr;
       }
       delete nextptr;
     }
   }

   /// Repack the space for a given key 
   void repack(iterator begin_iter, iterator end_iter) {
     blocktype* bptr = begin_iter.get_blockptr();
     blocktype* eptr = end_iter.get_blockptr();

     if (bptr == NULL || bptr == eptr) {
       return;
     }
     while (bptr != eptr && bptr->next() != eptr) {
       merge_next(bptr);
       if (bptr->is_full())  {
         bptr = bptr->next();
       }
     }
    }

   //////////////////// Block Access API ///////////////////////// 
   /*
    * Returns the nth block in the list. Linear time.
    */
   blocktype* get_block(size_t n) {
     size_t i = 0;
     blocktype* cur = head;
     while (cur != NULL && i < n) {
       cur = cur->_next;
       ++i;
     }
     return cur;
   }

   size_t num_blocks() const {
     if (head == NULL) 
       return 0;
     size_t ret = 1;
     blocktype* ptr = head;
     while (ptr != tail) {
       ptr = ptr->next();
       ++ret;
     }
     return ret;
   }

   //////////////////// Pretty print API ///////////////////////// 
   void print(std::ostream& out) const {
     blocktype* cur = head;
     while (cur != NULL) {
       cur->print(out);
       out << "-> ";
       cur = cur->_next;
     }
     out << "||" << std::endl;
   }
   
   //////////////////// Read Write ///////////////////////// 
     void swap (block_linked_list& other) {
       clear();
       delete head;
       head = other.head;
       tail = other.tail;
       _size = other._size;
       other.head = other.tail = NULL;
       other._size = 0;
     }

     void clear() {
       blocktype* tmp = NULL;
       while(head != tail) {
         ASSERT_TRUE(head != NULL);
         tmp = head;
         head = head->_next;
         delete tmp;
       }
       delete head;
       head = tail = NULL;
       _size = 0;
     }


     void save(oarchive& oarc) const {
       serialize_iterator(oarc, begin(), end(), size());
     }
   

   //////////////////// Helper Function ///////////////////////// 
   private:
   /**
    * \internal
    * Allocate new space for insertion if the iterator is end.
    * Return the iterator to the new space.
    */
   iterator get_insert_iterator(iterator iter) {
     bool is_end = (iter == end());
     if (is_end) {
       if (tail == NULL) {
         head = tail = new blocktype();
       } else if (tail->is_full()) {
         append_block();
       }
       iter.get_blockptr() = tail;
       iter.get_offset() = tail->size();
     }
     return iter;
   }

   blocktype* insert_block(blocktype* ins) {
     blocktype* ret = new blocktype();
     ret->_next = ins->next();
     ins->_next = ret;
     if (ins == tail) {
       tail = ret;
     }
     return ret;
   }

   blocktype* append_block() {
     return insert_block(tail);
   }

   /**
    * \internal 
    * Insert a range of values into the end of the given block.
    * Will create new blocks after the given block when necessary.
    *
    * \note 
    * This operation will NOT affect the blocks after the given block.
    *
    * Returns the begin and end iterator to the new elements. 
    *
    * \note 
    * The end iterator does jump to the next block. Instead, it points at the 
    * memory of last_block->size().
    */
  template<typename InputIterator>
  std::pair<iterator, iterator> 
     append_to_block(blocktype* ibegin_ptr, InputIterator first, InputIterator last) {
     ASSERT_TRUE(ibegin_ptr != NULL);
     const size_t len = last-first;

     /// If nothing to append, return the begin location 
     if (len == 0) { 
       iterator ret(ibegin_ptr, ibegin_ptr->size());
       return std::pair<iterator, iterator>(ret, ret);
     }

     /// elements to return
     blocktype* iend_ptr = ibegin_ptr;
     uint32_t ibegin_offset, iend_offset;

     // create a new block if the current is full
     if (ibegin_ptr->is_full()) {
       ibegin_ptr = insert_block(ibegin_ptr); 
     }

     /// Fill in the rest of the current block
     size_t nold = ibegin_ptr->size(); // num of old elements
     size_t spaceleft = (blocksize - nold);  // room left
     size_t nnew = std::min(len, spaceleft); // num of new elements to insert
     ASSERT_TRUE(nold+nnew <= blocksize);
     ASSERT_TRUE(nnew > 0);
     std::copy(first, first+nnew, &(ibegin_ptr->values[nold]));
     ibegin_ptr->_size += nnew;
     ibegin_offset = nold;
     iend_ptr = ibegin_ptr;
     iend_offset = ibegin_ptr->size();

     // Creates a block chain for remaining elements
     if (len > spaceleft) {
       blocktype* current = insert_block(ibegin_ptr); 
       InputIterator iter = first + spaceleft; 
       while (iter != last) {
         InputIterator end =  std::min(iter+blocksize, last);
         current->assign(iter, end);
         iter = end;
         if (iter != last) {
           current = insert_block(current); 
         }
       }
       iend_ptr = current;
       iend_offset = current->size(); 
     }
     _size += len;

     return std::pair<iterator,iterator>(
         iterator(ibegin_ptr, ibegin_offset)
         ,iterator(iend_ptr, iend_offset));
   }


   //////////////////// Private Data Member ///////////////////////// 
   private:
     blocktype* head;
     blocktype* tail;
     size_t _size;
  };
} // end of namespace
#endif

     // } 
     // else {
     //   blocktype* splitblk = insert_block(ibegin_ptr);
     //   size_t nfirst = (nold+nnew)/2; 
     //   size_t nsecond = (nold+nnew-nfirst);
     //   ibegin_ptr->_size = nfirst;
     //   splitblk->_size = nsecond;
     //   if (nold < nfirst) {
     //     size_t padsize = nfirst - nold;
     //     std::copy(first, first+padsize, &(ibegin_ptr->values[nold]));
     //     std::copy(first+padsize, first+nnew, splitblk->values);
     //     ibegin_offset = nold;
     //   } else {
     //     size_t padsize = nold-nfirst;
     //     memcpy(splitblk->values, &(ibegin_ptr->values[nfirst]), padsize*sizeof(valuetype));
     //     std::copy(first, first+nnew, &(splitblk->values[padsize]));
     //     ibegin_ptr = splitblk;
     //     ibegin_offset = padsize;
     //  }
       // if (!add_new_blocks) {
       //    iend_ptr = splitblk;
       //    iend_offset = nsecond;
       // }
     // }


================================================
FILE: src/graphlab/util/generics/conditional_addition_wrapper.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_CONDITIONAL_ADDITION_WRAPPER_HPP
#define GRAPHLAB_CONDITIONAL_ADDITION_WRAPPER_HPP
#include <algorithm>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iarchive.hpp>


namespace graphlab {

  template <typename T>
  struct conditional_addition_wrapper {
  public:
    bool has_value;
    T value;
    conditional_addition_wrapper() : has_value(false), value(T()) {};
    explicit conditional_addition_wrapper(const T& t,
                                          bool has_value = true)
      :has_value(has_value), value(t) {};
  
    void set(const T& t) {
      value = t;
      has_value = true;
    }
    void swap(T& t) {
      std::swap(value, t);
      has_value = true;
    }
    void clear() {
      has_value = false;
      value = T();
    }

    bool empty() const {
      return !has_value;
    }

    bool not_empty() const {
      return has_value;
    }
  
  
    conditional_addition_wrapper& 
    operator+=(const conditional_addition_wrapper<T>& c) {
      if (has_value && c.has_value) {
        // if we both have value, do the regular +=
        value += c.value;
      }
      else if (!has_value && c.has_value) {
        // I have no value, but other has value. Use the other
        has_value = true;
        value = c.value;
      }
      return *this;
    }

    conditional_addition_wrapper& operator+=(const T& c) {
      if (has_value) {
        value += c;
      }
      else if (!has_value) {
        // I have no value, but other has value. Use the other
        has_value = true;
        value = c;
      }
      return *this;
    }


    void save(oarchive& oarc) const {
      oarc << has_value;
      if (has_value) oarc << value;
    }


    void load(iarchive& iarc) {
      iarc >> has_value;
      if (has_value) iarc >> value;
      else value = T();
    }
  
  };
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/util/generics/conditional_combiner_wrapper.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_CONDITIONAL_COMBINER_WRAPPER_HPP
#define GRAPHLAB_CONDITIONAL_COMBINER_WRAPPER_HPP
#include <algorithm>
#include <boost/function.hpp>
#include <graphlab/serialization/oarchive.hpp>
#include <graphlab/serialization/iarchive.hpp>


namespace graphlab {

  template <typename T>
  struct conditional_combiner_wrapper {
  public:
    bool has_value;
    T value;
    boost::function<void(T&, const T&)> combiner;
    conditional_combiner_wrapper(boost::function<void(T&, const T&)> combiner = NULL) : has_value(false), value(T()), combiner(combiner) {};
    explicit conditional_combiner_wrapper(const T& t,
                                          bool has_value = true)
      :has_value(has_value), value(t) {};
 
    void set_combiner(boost::function<void(T&, const T&)> comb) {
      combiner = comb;
    }

    void set(const T& t) {
      value = t;
      has_value = true;
    }
    void swap(T& t) {
      std::swap(value, t);
      has_value = true;
    }
    void clear() {
      has_value = false;
      value = T();
    }

    bool empty() const {
      return !has_value;
    }

    bool not_empty() const {
      return has_value;
    }
  
  
    conditional_combiner_wrapper& 
    operator+=(const conditional_combiner_wrapper<T>& c) {
      if (has_value && c.has_value) {
        // if we both have value, do the regular +=
        combiner(value, c.value);
      }
      else if (!has_value && c.has_value) {
        // I have no value, but other has value. Use the other
        has_value = true;
        value = c.value;
      }
      return *this;
    }

    conditional_combiner_wrapper& operator+=(const T& c) {
      if (has_value) {
        combiner(value, c);
      }
      else if (!has_value) {
        // I have no value, but other has value. Use the other
        has_value = true;
        value = c;
      }
      return *this;
    }


    void save(oarchive& oarc) const {
      oarc << has_value;
      if (has_value) oarc << value;
    }


    void load(iarchive& iarc) {
      iarc >> has_value;
      if (has_value) iarc >> value;
      else value = T();
    }
  
  };
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/util/generics/counting_sort.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_COUNTING_SORT
#define GRAPHLAB_COUNTING_SORT

#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <vector>
#include <graphlab/parallel/atomic.hpp>

namespace graphlab {
    /**
     *  Count the value_vec.
     *  Generate permute_index for value_vec in ascending order and 
     *  optionally fill in the prefix array of the counts. 
     **/
    template <typename valuetype, typename sizetype>
    void counting_sort(const std::vector<valuetype>& value_vec,
                       std::vector<sizetype>& permute_index,
                       std::vector<sizetype>* prefix_array = NULL) {
      if(value_vec.size() == 0) return;

      valuetype maxval = *std::max_element(value_vec.begin(), value_vec.end());
      std::vector< atomic<size_t> > counter_array(maxval+1);
      permute_index.resize(value_vec.size(), 0);
      permute_index.assign(value_vec.size(), 0);
#ifdef _OPENMP
#pragma omp parallel for
#endif
      for (ssize_t i = 0; i < ssize_t(value_vec.size()); ++i) {
        size_t val = value_vec[i];
        counter_array[val].inc();
      }

      for (size_t i = 1; i < counter_array.size(); ++i) {
        counter_array[i] += counter_array[i-1];
      }

#ifdef _OPENMP
#pragma omp parallel for
#endif
      for (ssize_t i = 0; i < ssize_t(value_vec.size()); ++i) {
        size_t val = value_vec[i];
        permute_index[counter_array[val].dec()] = i;
      }

      if (prefix_array != NULL) {
        prefix_array->resize(counter_array.size());
#ifdef _OPENMP
#pragma omp parallel for
#endif
        for (ssize_t i = 0; i < ssize_t(counter_array.size()); ++i) {
          (*prefix_array)[i] = counter_array[i];
        }
      }
    }
} // end of graphlab

#endif


================================================
FILE: src/graphlab/util/generics/csr_storage.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_CSR_STORAGE
#define GRAPHLAB_CSR_STORAGE

#include <iostream>
#include <vector>

#include <graphlab/util/generics/counting_sort.hpp>
#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

namespace graphlab {
  /**
   * A compact key-value(s) data structure using Compressed Sparse Row format.
   * The key has type size_t and can be assolicated with multiple values of valuetype.
   * The core operation of is querying the list of values associated with the query key *  and returns the begin and end iterators via <code>begin(id)</code>
   * and <code>end(id)</code>.
   */
  template <typename valuetype, typename sizetype=size_t>
  class csr_storage {
   public:
     typedef typename std::vector<valuetype>::iterator iterator;
     typedef typename std::vector<valuetype>::const_iterator const_iterator;
     typedef valuetype value_type;

   public:
     csr_storage() { }

     /**
      * Construct the storage from given id vector and value vector.
      * id_vec and value_vec must have the same size.
      */
     template<typename idtype>
     csr_storage(const std::vector<idtype>& id_vec,
                 const std::vector<valuetype>& value_vec) {
        init(id_vec, value_vec);
     }

     template<typename idtype>
     void init(const std::vector<idtype>& id_vec,
               const std::vector<valuetype>& value_vec) {

      ASSERT_EQ(id_vec.size(), value_vec.size());

      std::vector<sizetype> permute_index;
      // Build index for id -> value 
      // Prefix of the counting array equals to the begin index for each id
      std::vector<sizetype>& prefix = value_ptrs;

      counting_sort(id_vec, permute_index, &prefix);

      values.reserve(value_vec.size());
      values.resize(value_vec.size());
      for (ssize_t i = 0; i < (ssize_t)value_vec.size(); ++i) {
        values[i] = value_vec[permute_index[i]];
      }

#ifdef DEBUG_CSR
      for (size_t i = 0; i < permute_index.size(); ++i)
        std::cout << permute_index[i] << " ";
      std::cout << std::endl;

      for (size_t i = 0; i < value_ptrs.size(); ++i)
        std::cout << value_ptrs[i] << " ";
      std::cout << std::endl;

      for (size_t i = 0; i < values.size(); ++i)
        std::cout << values[i] << " ";
      std::cout << std::endl;

      for (size_t i = 0; i < num_keys(); ++i) {
        std::cout << i << ": ";
        iterator it = begin(i);
        while (it != end(i)) {
          std::cout << *it << " "; 
          ++it;
        }
        std::cout << endl;
      }
      std::cout << std::endl;
#endif
     }

     /**
      * Wrap the index vector and value vector into csr_storage.
      * Check the property of the input vector.
      * The input vector will be cleared. 
      */
     void wrap(std::vector<sizetype>& valueptr_vec,
               std::vector<valuetype>& value_vec) {
       for (ssize_t i = 1; i < (ssize_t)valueptr_vec.size(); ++i) {
         ASSERT_LE(valueptr_vec[i-1], valueptr_vec[i]);
         ASSERT_LT(valueptr_vec[i], value_vec.size());
       }
       value_ptrs.swap(valueptr_vec);
       values.swap(value_vec);
     }

     /// Number of keys in the storage.
     inline size_t num_keys() const { return value_ptrs.size(); }

     /// Number of values in the storage.
     inline size_t num_values() const { return values.size(); }

     /// Return iterator to the begining value with key == id 
     inline iterator begin(size_t id) {
       return id < num_keys() ? values.begin()+value_ptrs[id] : values.end();
     } 

     /// Return iterator to the ending+1 value with key == id 
     inline iterator end(size_t id) {
       return (id+1) < num_keys() ? values.begin()+value_ptrs[id+1] : values.end();
     }

     /// Return iterator to the begining value with key == id 
     inline const_iterator begin(size_t id) const {
       return id < num_keys() ? values.begin()+value_ptrs[id] : values.end();
     } 

     /// Return iterator to the ending+1 value with key == id 
     inline const_iterator end(size_t id) const {
       return (id+1) < num_keys() ? values.begin()+value_ptrs[id+1] : values.end();
     }

     /// printout the csr storage
     void print(std::ostream& out) {
       for (size_t i = 0; i < num_keys(); ++i)  {
         iterator iter = begin(i);
          out << i << ": ";
         while (iter != end(i)) {
           out << *iter <<  " ";
           ++iter;
         }
         out << std::endl;
       }
     }

   public:
     std::vector<valuetype> get_values() { return values; }
     std::vector<sizetype> get_index() { return value_ptrs; }

     void swap(csr_storage<valuetype, sizetype>& other) {
       value_ptrs.swap(other.value_ptrs);
       values.swap(other.values);
     }

     void clear() {
       std::vector<sizetype>().swap(value_ptrs);
       std::vector<valuetype>().swap(values);
     }

     void load(iarchive& iarc) {
       clear();
       iarc >> value_ptrs
            >> values;
     }
     void save(oarchive& oarc) const {
       oarc << value_ptrs
            << values;
     }

     size_t estimate_sizeof() const {
       return sizeof(value_ptrs) + sizeof(values) + sizeof(sizetype)*value_ptrs.capacity() + sizeof(valuetype) * values.capacity();
     }

   private:
     std::vector<sizetype> value_ptrs;
     std::vector<valuetype> values;
  }; // end of class
} // end of graphlab 
#endif


================================================
FILE: src/graphlab/util/generics/dynamic_block.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_DYNAMIC_BLOCK_HPP
#define GRAPHLAB_DYNAMIC_BLOCK_HPP

#include <stdint.h>
#include <graphlab/logger/assertions.hpp>

namespace graphlab {
  template<typename valuetype, uint32_t capacity>
  class block_linked_list;

  /**
   * Define a block storage with valuetype and fixed capacity.
   */
  template<typename valuetype, uint32_t capacity=(4096-20)/sizeof(valuetype)>
  class dynamic_block {
   public:

     /// construct empty block
     dynamic_block() : _next(NULL), _size(0) { }

     template<typename InputIterator>
     void assign(InputIterator first, InputIterator last) {
       size_t len = last-first; 
       ASSERT_LE(len, capacity);
       _size = last-first;
       int i = 0;
       InputIterator iter = first;
       while (iter != last) {
         values[i++] = *iter;
         iter++;
       }
     }

     /// split the block into two parts
     void split() {
       // create new block
       dynamic_block* secondhalf = new dynamic_block();
       // copy the second half over
       uint32_t mid = capacity/2;
       memcpy(secondhalf->values, &values[mid], (capacity/2)*sizeof(valuetype));
       // update pointer
       secondhalf->_next = _next;
       _next = secondhalf;
       _size = capacity/2;
       secondhalf->_size = capacity/2;
     }

     /// return the ith element in the block
     valuetype& get(uint32_t i) {
       ASSERT_LT(i, _size);
       return values[i];
     }

     /// add a new element in to the end of the block
     /// return false when the block is full
     bool try_add(const valuetype& elem) {
       if (_size == capacity) {
         return false;
       } else {
         values[_size++] = elem;
         return true;
       }
     }

     inline bool is_full() const { return _size == capacity; }

     /// insert an element at pos, move elements after pos by 1.
     /// return false when the block is full
     bool insert(const valuetype& elem, uint32_t pos) {
       if (is_full()) {
         return false;
       } else {
         if (pos < _size)
           memmove(values+pos+1, values+pos, (_size-pos)*sizeof(valuetype));
         values[pos] = elem;
         ++_size;
         return true;
       }
     }

     /// returns the size of the block
     size_t size() const {
       return _size;
     }

     dynamic_block* next() {
       return _next;
     }

     void clear() {
       _size = 0;
     }

   //////////////////// Pretty print API ///////////////////////// 
   void print(std::ostream& out) {
     for (size_t i = 0; i < _size; ++i) {
       out << values[i] << " ";
     }
     if (_size < capacity) {
       out << "_" << (capacity-_size) << " ";
     }
   }
   
   private:
     /// value storage
     valuetype values[capacity];
     /// pointer to the next block
     dynamic_block* _next;
     /// size of the block
     uint32_t _size;

     friend class block_linked_list<valuetype, capacity>;
  };
}// end of namespace
#endif


================================================
FILE: src/graphlab/util/generics/dynamic_csr_storage.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#ifndef GRAPHLAB_DYNAMIC_CSR_STORAGE
#define GRAPHLAB_DYNAMIC_CSR_STORAGE

#include <iostream>
#include <vector>
#include <algorithm>

#include <graphlab/util/generics/counting_sort.hpp>
#include <graphlab/util/generics/block_linked_list.hpp>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

#include <boost/iterator/permutation_iterator.hpp>

namespace graphlab {
  /**
   * A compact key-value(s) data structure using Compressed Sparse Row format.
   * The key has type size_t and can be assolicated with multiple values of valuetype.
   *
   * The core operation of is querying the list of values associated with the query key
   * and returns the begin and end iterators via <code>begin(id)</code> and <code>end(id)</code>.
   *
   * Also, this class supports insert (and batch insert) values associated with any key. 
   */
  template<typename valuetype, typename sizetype=size_t, 
           uint32_t blocksize=(4096-20)/(4*sizeof(valuetype))> // the block size makes the block fit in a memory page
  class dynamic_csr_storage {
   public:
     typedef block_linked_list<valuetype, blocksize> block_linked_list_t;
     typedef typename block_linked_list_t::iterator iterator;
     typedef typename block_linked_list_t::const_iterator const_iterator;
     typedef typename block_linked_list_t::blocktype blocktype;
     typedef valuetype value_type;

   public:
     dynamic_csr_storage() { }

     /**
      * Create the storage with given keys and values. The id_vec and value_vec must  
      * have the same size.
      */
     template<typename idtype>
     dynamic_csr_storage(const std::vector<idtype>& id_vec,
                         const std::vector<valuetype>& value_vec) {
        init(id_vec, value_vec);
     }


     /**
      * Wrap the index vector and value vector into csr_storage.
      * Check the property of the input vector. 
      * Clean up the input on finish. 
      */
     void wrap(std::vector<sizetype>& valueptr_vec,
               std::vector<valuetype>& value_vec) {

       for (ssize_t i = 1; i < (ssize_t)valueptr_vec.size(); ++i) {
         ASSERT_LE(valueptr_vec[i-1], valueptr_vec[i]);
         ASSERT_LT(valueptr_vec[i], value_vec.size());
       }

       values.assign(value_vec.begin(), value_vec.end());
       sizevec2ptrvec(valueptr_vec, value_ptrs);

       std::vector<value_type>().swap(value_vec);
       std::vector<sizetype>().swap(valueptr_vec);
     }

     /// Number of keys in the storage.
     inline size_t num_keys() const { return value_ptrs.size(); }

     /// Number of values in the storage.
     inline size_t num_values() const { return values.size(); }

     /// Return iterator to the begining value with key == id 
     inline iterator begin(size_t id) {
       return id < num_keys() ? value_ptrs[id] : values.end();
     } 

     /// Return iterator to the ending+1 value with key == id 
     inline iterator end(size_t id) {
       return (id+1) < num_keys() ? value_ptrs[id+1] : values.end();
     }

     /// Return iterator to the begining value with key == id 
     inline const_iterator begin(size_t id) const {
       return id < num_keys() ? value_ptrs[id] : values.end();
     } 

     /// Return iterator to the ending+1 value with key == id 
     inline const_iterator end(size_t id) const {
       return (id+1) < num_keys() ? value_ptrs[id+1] : values.end();
     }

     ////////////////////////// Insertion API ////////////////////////
     /// Insert a new value to a given key
     template <typename idtype>
     void insert (const idtype& key, const valuetype& value) {
       insert(key, &value, (&value + 1));
     }

     /// Insert a range of values to a given key
     template <typename idtype, typename InputIterator>
     void insert (const idtype& key, InputIterator first, InputIterator last) {
       if (last-first == 0) {
         return;
       }
       // iterator to the insertion position
       iterator ins_iter = end(key);

       // begin_ins_iter and end_ins_iterator point to 
       // defines the range of the new inserted element.
       std::pair<iterator,iterator> iter_pair =  values.insert(ins_iter, first, last);
       iterator begin_ins_iter = iter_pair.first;
       iterator end_ins_iter =  iter_pair.second;

       // add blocks for new key
       while (key >= num_keys()) {
         value_ptrs.push_back(begin_ins_iter);
       }

       // Update pointers. 
       // value_ptrs[key] = begin_ins_iter;
       // ASSERT_TRUE(begin_ins_iter == ins_iter);
       
       // Update pointers to the right of ins_iter. 
       // Base case: the pointer of ins_iter is mapped to end_ins_iter. 
       uint32_t oldoffset =  ins_iter.get_offset();
       iterator newiter =  end_ins_iter;
       for (size_t scan = key+1; scan < num_keys(); ++scan) {
         if (value_ptrs[scan].get_blockptr() == ins_iter.get_blockptr()) {
           while (oldoffset != value_ptrs[scan].get_offset()) {
             ++oldoffset;
             ++newiter;
           }
           value_ptrs[scan] = newiter;
         } else {
           break;
         }
       }
     }

     /// Repack the values in parallel
     void repack() {
       // values.print(std::cerr);
#ifdef _OPENMP
#pragma omp parallel for
#endif
       for (ssize_t i = 0; i < (ssize_t)num_keys(); ++i) {
           values.repack(begin(i), end(i));
       }
     }

     /////////////////////////// I/O API ////////////////////////
     /// Debug print out the content of the storage;
     void print(std::ostream& out) const {
       for (size_t i = 0; i < num_keys(); ++i)  {
         const_iterator iter = begin(i);
          out << i << ": ";
          // out << "begin: " << iter.get_blockptr() << " " << iter.get_offset() << std::endl;
          // out << "end: " << end(i).get_blockptr() << " " << end(i).get_offset() << std::endl;
         while (iter != end(i)) {
           out << *iter <<  " ";
           ++iter;
         }
         out << std::endl;
       }
     }

     void swap(dynamic_csr_storage<valuetype, sizetype>& other) {
       value_ptrs.swap(other.value_ptrs);
       values.swap(other.values);
     }

     void clear() {
       std::vector<iterator>().swap(value_ptrs);
       values.clear();
     }

     void load(iarchive& iarc) { 
       clear();
       std::vector<sizetype> valueptr_vec;
       std::vector<valuetype> all_values;
       iarc >> valueptr_vec >> all_values;

       wrap(valueptr_vec, all_values);
     }

     void save(oarchive& oarc) const { 
       std::vector<sizetype> valueptr_vec(num_keys(), 0);
       for (size_t i = 1;i < num_keys(); ++i) {
         const_iterator begin_iter = begin(i - 1);
         const_iterator end_iter = end(i - 1);
         sizetype length = begin_iter.pdistance_to(end_iter);
         valueptr_vec[i] = valueptr_vec[i - 1] + length;
       }

       std::vector<valuetype> out;
       std::copy(values.begin(), values.end(), std::inserter(out, out.end()));

       oarc << valueptr_vec << out;
     }

     ////////////////////// Internal APIs /////////////////
   public:
     /**
      * \internal
      */
     const std::vector<iterator>& get_index() { return value_ptrs; }
     const block_linked_list_t& get_values() { return values; }

     size_t estimate_sizeof() const {
       return sizeof(value_ptrs) + sizeof(values) + sizeof(sizetype)*value_ptrs.size() + sizeof(valuetype) * values.size();
     }

     void meminfo(std::ostream& out) {
       out << "num values: " <<  (float)num_values()
                 << "\n num blocks: " << values.num_blocks()
                 << "\n block size: " << blocksize
                 << std::endl;
       out << "utilization: " <<  (float)num_values() / (values.num_blocks() * blocksize) << std::endl;
     }

     ///////////////////// Helper Functions /////////////
   private:
     /**
      * Initialize the internal member with input key_vec and value_vec.
      * value_vec will be compactly wrapped into a block_linked_list
      * and key_vec will be converted into an array of iterators (pointers)
      * to the values in the block_linked_list.
      */
     template<typename idtype>
     void init(const std::vector<idtype>& id_vec,
               const std::vector<valuetype>& value_vec) {
      ASSERT_EQ(id_vec.size(), value_vec.size());
      std::vector<sizetype> permute_index;

      // Build index for id -> value 
      // Prefix of the counting array equals to the begin index for each id
      std::vector<sizetype> prefix;
      counting_sort(id_vec, permute_index, &prefix);

      // Fill in the value vector
      typedef boost::permutation_iterator<
               typename std::vector<valuetype>::const_iterator,
               typename std::vector<sizetype>::const_iterator> permute_iterator;
      permute_iterator _begin = boost::make_permutation_iterator(value_vec.begin(), permute_index.begin());
      permute_iterator _end = boost::make_permutation_iterator(value_vec.end(), permute_index.end());
      values.assign(_begin, _end);

      // Fill in the key vector
      sizevec2ptrvec(prefix, value_ptrs);

      // Build the index pointers 
#ifdef DEBUG_CSR
      for (size_t i = 0; i < permute_index.size(); ++i)
        std::cerr << permute_index[i] << " ";
      std::cerr << std::endl;

      for (size_t i = 0; i < value_ptrs.size(); ++i)
        std::cerr << prefix[i] << " ";
      std::cerr << std::endl;

      for (permute_iterator it = _begin; it != _end; ++it) {
        std::cerr << *it << " ";
      }
      std::cerr << std::endl;

      for (size_t i = 0; i < num_keys(); ++i) {
        std::cerr << i << ": ";
        iterator it = begin(i);
        while (it != end(i)) {
          std::cerr << *it << " "; 
          ++it;
        }
        std::cerr << std::endl;
      }
      std::cerr << std::endl;
#endif
     }

     // Convert integer pointers into block_linked_list::value_iterator
     // Assuming all blocks are fully packed.
     void sizevec2ptrvec (const std::vector<sizetype>& ptrs,
                          std::vector<iterator>& out) {
       ASSERT_EQ(out.size(), 0);
       out.reserve(ptrs.size());

       // for efficiency, we advance pointers based on the previous value
       // because block_linked_list is mostly forward_traversal.
       iterator it = values.begin();
       sizetype prev = 0;
       for (size_t i = 0; i < ptrs.size(); ++i) {
         sizetype cur = ptrs[i];
         it += (cur-prev);
         out.push_back(it);
         prev = cur; 
       }
     }

   private:
     std::vector<iterator> value_ptrs;
     block_linked_list_t values;
  }; // end of class
} // end of graphlab 
#endif


================================================
FILE: src/graphlab/util/generics/float_selector.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_FLOAT_SELECTOR_HPP
#define GRAPHLAB_FLOAT_SELECTOR_HPP

namespace graphlab {
  
  template <int len>
  struct float_selector {
    // invalid
  };


  template <>
  struct float_selector<4> {
    typedef float float_type;
  };

  template <>
  struct float_selector<8> {
    typedef double float_type;
  };

  template <>
  struct float_selector<16> {
    typedef long double float_type;
  };

}
#endif


================================================
FILE: src/graphlab/util/generics/integer_selector.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INTEGER_SELECTOR_HPP
#define GRAPHLAB_INTEGER_SELECTOR_HPP
#include <stdint.h>
namespace graphlab {
  
  template <int len>
  struct u_integer_selector {
    // invalid
  };

  template <>
  struct u_integer_selector<1> {
    typedef uint8_t integer_type;
  };

  template <>
  struct u_integer_selector<2> {
    typedef uint16_t integer_type;
  };

  template <>
  struct u_integer_selector<4> {
    typedef uint32_t integer_type;
  };

  template <>
  struct u_integer_selector<8> {
    typedef uint64_t integer_type;
  };

}
#endif


================================================
FILE: src/graphlab/util/generics/remove_member_pointer.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


//  (C) Copyright John Maddock 2008.
//  Use, modification and distribution are subject to the Boost Software License,
//  Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
//  http://www.boost.org/LICENSE_1_0.txt).
//
//  See http://www.boost.org/libs/type_traits for most recent version including documentation.

#if !defined(BOOST_PP_IS_ITERATING)

#ifndef BOOST_TT_REMOVE_MEMBER_POINTER_HPP_INCLUDED
#define BOOST_TT_REMOVE_MEMBER_POINTER_HPP_INCLUDED

#if defined(BOOST_TT_PREPROCESSING_MODE)
#   include <boost/preprocessor/iterate.hpp>
#   include <boost/preprocessor/enum_params.hpp>
#   include <boost/preprocessor/comma_if.hpp>
#endif

#include <boost/config.hpp>
#include <boost/detail/workaround.hpp>

// should be the last #include
#include <boost/type_traits/detail/type_trait_def.hpp>

namespace boost {

#if !defined(BOOST_TT_PREPROCESSING_MODE)
namespace detail{
template <class T >
struct rem_mem_pointer_impl{ typedef T type; };
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)> { typedef R type; };
template <class R, class T >
struct rem_mem_pointer_impl<R (T::* const)> { typedef R type; };
template <class R, class T >
struct rem_mem_pointer_impl<R (T::* volatile)> { typedef R type; };
template <class R, class T >
struct rem_mem_pointer_impl<R (T::* const volatile)> { typedef R type; };

// pre-processed code, don't edit, try GNU cpp with 
// cpp -I../../../ -DBOOST_TT_PREPROCESSING_MODE -x c++ -P filename
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)() > { typedef R type(); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)( ...) > { typedef R type( ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)() const > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)() volatile > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)() const volatile > { typedef R type(); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)( ...) const > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)( ...) volatile > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*)( ...) const volatile > { typedef R type( ...); };
#endif
#endif

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)() > { typedef R type(); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)( ...) > { typedef R type( ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)() const > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)() volatile > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)() const volatile > { typedef R type(); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)( ...) const > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)( ...) volatile > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const)( ...) const volatile > { typedef R type( ...); };
#endif
#endif

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)() > { typedef R type(); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)( ...) > { typedef R type( ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)() const > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)() volatile > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)() const volatile > { typedef R type(); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)( ...) const > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)( ...) volatile > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*const volatile)( ...) const volatile > { typedef R type( ...); };
#endif
#endif

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)() > { typedef R type(); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)( ...) > { typedef R type( ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)() const > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)() volatile > { typedef R type(); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)() const volatile > { typedef R type(); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)( ...) const > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)( ...) volatile > { typedef R type( ...); };

template <class R, class T >
struct rem_mem_pointer_impl<R (T::*volatile)( ...) const volatile > { typedef R type( ...); };
#endif
#endif
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0) > { typedef R type( T0); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0 ...) > { typedef R type( T0 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0) const > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0) volatile > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0) const volatile > { typedef R type( T0); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0 ...) const > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0 ...) volatile > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*)( T0 ...) const volatile > { typedef R type( T0 ...); };
#endif
#endif

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0) > { typedef R type( T0); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0 ...) > { typedef R type( T0 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0) const > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0) volatile > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0) const volatile > { typedef R type( T0); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0 ...) const > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0 ...) volatile > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const)( T0 ...) const volatile > { typedef R type( T0 ...); };
#endif
#endif

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0) > { typedef R type( T0); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 ...) > { typedef R type( T0 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0) const > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0) volatile > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0) const volatile > { typedef R type( T0); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 ...) const > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 ...) volatile > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 ...) const volatile > { typedef R type( T0 ...); };
#endif
#endif

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0) > { typedef R type( T0); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 ...) > { typedef R type( T0 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0) const > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0) volatile > { typedef R type( T0); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0) const volatile > { typedef R type( T0); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 ...) const > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 ...) volatile > { typedef R type( T0 ...); };

template <class R, class T , class T0>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 ...) const volatile > { typedef R type( T0 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1) > { typedef R type( T0 , T1); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 ...) > { typedef R type( T0 , T1 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1) const > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1) volatile > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1) const volatile > { typedef R type( T0 , T1); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 ...) const > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 ...) volatile > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 ...) const volatile > { typedef R type( T0 , T1 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1) > { typedef R type( T0 , T1); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 ...) > { typedef R type( T0 , T1 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1) const > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1) volatile > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1) const volatile > { typedef R type( T0 , T1); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 ...) const > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 ...) volatile > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 ...) const volatile > { typedef R type( T0 , T1 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1) > { typedef R type( T0 , T1); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 ...) > { typedef R type( T0 , T1 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1) const > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1) volatile > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1) const volatile > { typedef R type( T0 , T1); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 ...) const > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 ...) volatile > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 ...) const volatile > { typedef R type( T0 , T1 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1) > { typedef R type( T0 , T1); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 ...) > { typedef R type( T0 , T1 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1) const > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1) volatile > { typedef R type( T0 , T1); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1) const volatile > { typedef R type( T0 , T1); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 ...) const > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 ...) volatile > { typedef R type( T0 , T1 ...); };

template <class R, class T , class T0 , class T1>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 ...) const volatile > { typedef R type( T0 , T1 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2) > { typedef R type( T0 , T1 , T2); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 ...) > { typedef R type( T0 , T1 , T2 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2) const > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2) volatile > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2) const volatile > { typedef R type( T0 , T1 , T2); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 ...) const > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 ...) volatile > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 ...) const volatile > { typedef R type( T0 , T1 , T2 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2) > { typedef R type( T0 , T1 , T2); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 ...) > { typedef R type( T0 , T1 , T2 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2) const > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2) volatile > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2) const volatile > { typedef R type( T0 , T1 , T2); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 ...) const > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 ...) volatile > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 ...) const volatile > { typedef R type( T0 , T1 , T2 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2) > { typedef R type( T0 , T1 , T2); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 ...) > { typedef R type( T0 , T1 , T2 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2) const > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2) volatile > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2) const volatile > { typedef R type( T0 , T1 , T2); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 ...) const > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 ...) volatile > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 ...) const volatile > { typedef R type( T0 , T1 , T2 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2) > { typedef R type( T0 , T1 , T2); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 ...) > { typedef R type( T0 , T1 , T2 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2) const > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2) volatile > { typedef R type( T0 , T1 , T2); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2) const volatile > { typedef R type( T0 , T1 , T2); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 ...) const > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 ...) volatile > { typedef R type( T0 , T1 , T2 ...); };

template <class R, class T , class T0 , class T1 , class T2>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 ...) const volatile > { typedef R type( T0 , T1 , T2 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3) > { typedef R type( T0 , T1 , T2 , T3); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 ...) > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3) const > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3) volatile > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3) const volatile > { typedef R type( T0 , T1 , T2 , T3); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 ...) const > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3) > { typedef R type( T0 , T1 , T2 , T3); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 ...) > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3) const > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3) volatile > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3) const volatile > { typedef R type( T0 , T1 , T2 , T3); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 ...) const > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3) > { typedef R type( T0 , T1 , T2 , T3); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 ...) > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3) const > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3) volatile > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3) const volatile > { typedef R type( T0 , T1 , T2 , T3); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 ...) const > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3) > { typedef R type( T0 , T1 , T2 , T3); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 ...) > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3) const > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3) volatile > { typedef R type( T0 , T1 , T2 , T3); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3) const volatile > { typedef R type( T0 , T1 , T2 , T3); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 ...) const > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4) > { typedef R type( T0 , T1 , T2 , T3 , T4); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4) const > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4) > { typedef R type( T0 , T1 , T2 , T3 , T4); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4) const > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4) > { typedef R type( T0 , T1 , T2 , T3 , T4); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4) const > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4) > { typedef R type( T0 , T1 , T2 , T3 , T4); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4) const > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 ...); };
#endif
#endif
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*const volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif
#endif

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };
#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
#endif

#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24); };

#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };

template <class R, class T , class T0 , class T1 , class T2 , class T3 , class T4 , class T5 , class T6 , class T7 , class T8 , class T9 , class T10 , class T11 , class T12 , class T13 , class T14 , class T15 , class T16 , class T17 , class T18 , class T19 , class T20 , class T21 , class T22 , class T23 , class T24>
struct rem_mem_pointer_impl<R (T::*volatile)( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...) const volatile > { typedef R type( T0 , T1 , T2 , T3 , T4 , T5 , T6 , T7 , T8 , T9 , T10 , T11 , T12 , T13 , T14 , T15 , T16 , T17 , T18 , T19 , T20 , T21 , T22 , T23 , T24 ...); };
} // namespace detail
#endif
#endif
#else

#undef BOOST_STATIC_CONSTANT
#define BOOST_PP_ITERATION_PARAMS_1 \
    (3, (0, 25, "boost/type_traits/remove_member_pointer.hpp"))
#include BOOST_PP_ITERATE()
#endif

#ifndef BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION

BOOST_TT_AUX_TYPE_TRAIT_DEF1(remove_member_pointer,T, typename detail::rem_mem_pointer_impl<T>::type)

#endif

} // namespace boost

#include <boost/type_traits/detail/type_trait_undef.hpp>

#endif // BOOST_TT_REMOVE_MEMBER_POINTER_HPP_INCLUDED

#else
#define BOOST_PP_COUNTER BOOST_PP_FRAME_ITERATION(1)

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };
@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif

@#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif
@#endif

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };
@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif

@#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif
@#endif

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };
@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif

@#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*const volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif
@#endif

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };
@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif

@#if !defined(BOOST_TT_NO_CV_FUNC_TEST)
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T)); };

@#ifndef BOOST_TT_NO_ELLIPSIS_IN_FUNC_TESTING
template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };

template <class R, class T BOOST_PP_COMMA_IF(BOOST_PP_COUNTER) BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,class T)>
struct rem_mem_pointer_impl<R (T::*volatile)(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...) const volatile > { typedef R type(BOOST_PP_ENUM_PARAMS(BOOST_PP_COUNTER,T) ...); };
@#endif
@#endif

#undef BOOST_PP_COUNTER
#endif // BOOST_PP_IS_ITERATING


================================================
FILE: src/graphlab/util/generics/robust_cast.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ROBUST_CAST_HPP
#define GRAPHLAB_ROBUST_CAST_HPP

#include <boost/utility.hpp>
#include <boost/type_traits/is_convertible.hpp>
namespace graphlab {
  /** robust_cast performs a static cast from type A to type B
      if a cast can be done. Return B() otherwise */
  
  template <typename Target, typename Source>
  typename boost::disable_if_c<boost::is_convertible<Source, Target>::value, 
                               Target>::type
                               robust_cast(const Source &h) {
    return Target();
  }
  
  template <typename Target, typename Source>
  typename boost::enable_if_c<boost::is_convertible<Source, Target>::value, 
                              Target>::type
                              robust_cast(const Source &h) {
    return (Target)h;
  }
}

#endif


================================================
FILE: src/graphlab/util/generics/shuffle.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INPLACE_SHUFFLE_HPP
#define GRAPHLAB_INPLACE_SHUFFLE_HPP
#include <algorithm>
#include <vector>
#include <cassert>
#include <iterator>

#ifndef __NO_OPENMP__
#include <omp.h>
#endif


namespace graphlab {
/**
 * Shuffles a random access container inplace such at
 * newcont[i] = cont[targets[i]]
 * targets must be the same size as the container
 * Both the container and the targets vector will be modified.
 */
template <typename Iterator, typename sizetype>
void inplace_shuffle(Iterator begin,
                     Iterator end, 
                     std::vector<sizetype> &targets) {
  size_t len = std::distance(begin, end);
  assert(len == targets.size());
  
  for (size_t i = 0;i < len; ++i) {
    // begin the permutation cycle
    if (i != targets[i]) {
      typename std::iterator_traits<Iterator>::value_type v = *(begin + i);
      size_t j = i;
      while(j != targets[j]) {
        size_t next = targets[j];
        if (next != i) {
          *(begin + j) = *(begin + next);
          targets[j] = j;
          j = next;
        } else {
          // end of cycle
          *(begin + j) = v;
          targets[j] = j;
          break;
        }
      }
    }
  }
}

/**
 * Shuffles a random access container inplace such at
 * newcont[i] = cont[targets[i]]
 * targets must be the same size as the container
 */
template <typename Container, typename sizetype>
void outofplace_shuffle(Container &c,
                        const std::vector<sizetype> &targets) {  
  Container result(targets.size());
#ifdef _OPENMP
#pragma omp parallel for
#endif
  for (ssize_t i = 0;i < ssize_t(targets.size()); ++i) {
    result[i] = c[targets[i]];
  }
  std::swap(c, result);
}

}
#endif


================================================
FILE: src/graphlab/util/generics/test_function_or_functor_type.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_TEST_FUNCTION_OR_FUNCTOR_TYPE_HPP
#define GRAPHLAB_TEST_FUNCTION_OR_FUNCTOR_TYPE_HPP
#include <boost/type_traits/is_same.hpp>
#include <boost/type_traits/remove_pointer.hpp>

namespace graphlab {

  /**
   * \brief This tests that a type F matches a specification
   * for a function type or implements a specification for a functor type.
   *
   * Where F is the type to test:
   * \code
   * test_function_or_functor_1<F, PreferredFunctionForm, RetType, Arg1>::value
   * \endcode
   * 
   * is true if any of the following is true:
   * \li F is the same type as PreferredFunctionForm
   * \li F is the same type as PreferredFunctionForm*
   * \li F implements an F::operator() with the prototype RetType F::operator()(Arg1)
   *
   * For instance,
   * \code
   * test_function_or_functor_1<F, void(const int&), void, const int&>::value
   * \endcode
   *
   * is true if any of the following is true:
   * \li F is a void(const int&)
   * \li F is a void(*)(const int&)
   * \li F is a class which implements void F::operator()(const int&)
   *
   * There is an additional "const" variant test_function_or_const_functor_1
   * which requires operator() to be a const function. i.e.:
   * \code
   * test_function_or_const_functor_1<F, void(const int&), void, const int&>::value
   * \endcode
   *
   * is true if any of the following is true:
   * \li F is the same type as PreferredFunctionForm
   * \li F is the same type as PreferredFunctionForm*
   * \li F implements an F::operator() with the prototype RetType F::operator()(Arg1) const
   *
   * Both variants have a 2 argument version test_function_or_functor_2 and
   * test_function_or_const_functor_2 which take an additional template
   * argument for the second argument.
   */
  template <typename F,
            typename PreferredFunctionForm,
            typename RetType,
            typename Arg1>
  struct test_function_or_functor_1 {

    // test if the functor type matches
    template <typename T, RetType (T::*)(Arg1)>
    struct SFINAE1 {};

    template <typename T>
    static char test1(SFINAE1<T, &T::operator()>*);

    template <typename T>
    static int test1(...);

    static const bool value = ((sizeof(test1<F>(0)) == sizeof(char)) ||
                               boost::is_same<F, PreferredFunctionForm>::value ||
                                boost::is_same<typename boost::remove_pointer<F>::type, PreferredFunctionForm>::value);
  };


  /**
   * \copydoc test_function_or_functor_1
   */
  template <typename F,
            typename PreferredFunctionForm,
            typename RetType,
            typename Arg1>
  struct test_function_or_const_functor_1 {

    // test if the functor type matches
    template <typename T, RetType (T::*)(Arg1) const>
    struct SFINAE1 {};

    template <typename T>
    static char test1(SFINAE1<T, &T::operator()>*);

    template <typename T>
    static int test1(...);
    
    static const bool value = ((sizeof(test1<F>(0)) == sizeof(char)) ||
                               boost::is_same<F, PreferredFunctionForm>::value ||
                                boost::is_same<typename boost::remove_pointer<F>::type, PreferredFunctionForm>::value);
  };


  /**
   * \copydoc test_function_or_functor_1
   */
  template <typename F,
            typename PreferredFunctionForm,
            typename RetType,
            typename Arg1,
            typename Arg2>
  struct test_function_or_functor_2 {

    // test if the functor type matches
    template <typename T, RetType (T::*)(Arg1, Arg2)>
    struct SFINAE1 {};

    template <typename T>
    static char test1(SFINAE1<T, &T::operator()>*);

    template <typename T>
    static int test1(...);

    static const bool value = ((sizeof(test1<F>(0)) == sizeof(char)) ||
                               boost::is_same<F, PreferredFunctionForm>::value ||
                                boost::is_same<typename boost::remove_pointer<F>::type, PreferredFunctionForm>::value);
  };

  
  /**
   * \copydoc test_function_or_functor_1
   */
  template <typename F,
            typename PreferredFunctionForm,
            typename RetType,
            typename Arg1,
            typename Arg2>
  struct test_function_or_const_functor_2 {

    // test if the functor type matches
    template <typename T, RetType (T::*)(Arg1, Arg2) const>
    struct SFINAE1 {};

    template <typename T>
    static char test1(SFINAE1<T, &T::operator()>*);

    template <typename T>
    static int test1(...);

    static const bool value = ((sizeof(test1<F>(0)) == sizeof(char)) ||
                               boost::is_same<F, PreferredFunctionForm>::value ||
                                boost::is_same<typename boost::remove_pointer<F>::type, PreferredFunctionForm>::value);
  };

  
} // namespace graphlab
#endif

================================================
FILE: src/graphlab/util/generics/vector_zip.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_VECTOR_ZIP_HPP
#define GRAPHLAB_VECTOR_ZIP_HPP

#ifndef __NO_OPENMP__
#include <omp.h>
#endif

#include <vector>

namespace graphlab {
  template<typename v1, typename v2>
  std::vector<std::pair<v1, v2> > 
    vector_zip(std::vector<v1>& vec1, std::vector<v2>& vec2) {

      assert(vec1.size() == vec2.size());
      size_t length = vec1.size();

      std::vector<std::pair<v1, v2> >  out;
      out.reserve(length);
      out.resize(length);

#ifdef _OPENMP
#pragma omp parallel for
#endif
    for (ssize_t i = 0; i < ssize_t(length); ++i) {
      out[i] = (std::pair<v1, v2>(vec1[i], vec2[i]));
    }
    std::vector<v1>().swap(vec1);
    std::vector<v2>().swap(vec2);
    return out;
  }
} // end of graphlab
#endif


================================================
FILE: src/graphlab/util/hash_functions.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_HASH_FUNCTIONS_HPP
#define GRAPHLAB_HASH_FUNCTIONS_HPP

namespace graphlab {
  /**
  \ingroup util_internal
  A hash function to maps a value to itself
  */
  class identity_hash{
  public:
    size_t operator()(const size_t &t) const{
      return t;
    }
  };
}
#endif


================================================
FILE: src/graphlab/util/hashstream.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_HASHSTREAM
#define GRAPHLAB_HASHSTREAM

#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/categories.hpp>

namespace graphlab {

  /// \ingroup util_internal
  namespace hashstream_impl {
    /// \ingroup util_internal
    struct hashstream_sink {
      size_t hash;
      size_t len;

      inline hashstream_sink(size_t unused = 0):hash(0),len(0) { }

      inline hashstream_sink(const hashstream_sink& other) :
        hash(other.hash),len(other.len) { }

      inline ~hashstream_sink() { }

      size_t size() const { return len; }
      char* c_str() { return NULL; }
      const char* c_str() const { return NULL; }

      void clear() {
        len = 0;
        hash = 0;
      }

      void reserve(size_t new_buffer_size) { }

      typedef char        char_type;
      struct category: public boost::iostreams::device_tag,
                       public boost::iostreams::output,
                       public boost::iostreams::multichar_tag { };

      /** the optimal buffer size is 0. */
      inline std::streamsize optimal_buffer_size() const { return 0; }

      inline std::streamsize advance(std::streamsize n) {
        len += n;
        return n;
      }

      inline std::streamsize write(const char* s, std::streamsize n) {
        for (size_t i = 0;i < (size_t)n; ++i) {
          hash = hash * 101 + s[i];
        }
        len += n;
        return n;
      }

      inline void swap(hashstream_sink &other) {
        std::swap(hash, other.hash);
        std::swap(len, other.len);
      }

    };

  }; // end of impl;


  typedef boost::iostreams::stream< hashstream_impl::hashstream_sink >
  hashstream;


}; // end of namespace graphlab
#endif


================================================
FILE: src/graphlab/util/hdfs.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/hdfs.hpp>

namespace graphlab {


hdfs& hdfs::get_hdfs() {
  static hdfs fs;
  return fs;
}

}

================================================
FILE: src/graphlab/util/hdfs.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_HDFS_HPP
#define GRAPHLAB_HDFS_HPP

// Requires the hdfs library
#ifdef HAS_HADOOP
extern "C" {
  #include <hdfs.h>
}
#endif

#include <vector>
#include <boost/iostreams/stream.hpp>


#include <graphlab/logger/assertions.hpp>


namespace graphlab {

#ifdef HAS_HADOOP
  class hdfs {
  private:
    /** the primary filesystem object */
    hdfsFS filesystem;
  public:
    /** hdfs file source is used to construct boost iostreams */
    class hdfs_device {
    public: // boost iostream concepts
      typedef char                                          char_type;
      struct category : 
        public boost::iostreams::bidirectional_device_tag, 
        public boost::iostreams::multichar_tag,
        public boost::iostreams::closable_tag { };
    private:
      hdfsFS filesystem;
      
      hdfsFile file;
     
    public:
      hdfs_device() : filesystem(NULL), file(NULL) { }
      hdfs_device(const hdfs& hdfs_fs, const std::string& filename,
                  const bool write = false) :
        filesystem(hdfs_fs.filesystem) {
        ASSERT_TRUE(filesystem != NULL);
        // open the file
        const int flags = write? O_WRONLY : O_RDONLY;
        const int buffer_size = 0; // use default
        const short replication = 0; // use default
        const tSize block_size = 0; // use default;
        file = hdfsOpenFile(filesystem, filename.c_str(), flags, buffer_size,
                            replication, block_size);
      }
      //      ~hdfs_device() { if(file != NULL) close(); }

      void close(std::ios_base::openmode mode = std::ios_base::openmode() ) { 
        if(file == NULL) return;
        if(file->type == OUTPUT) {
          const int flush_error = hdfsFlush(filesystem, file);
          ASSERT_EQ(flush_error, 0);
        }
        const int close_error = hdfsCloseFile(filesystem, file);
        ASSERT_EQ(close_error, 0);
        file = NULL;
      }

      /** the optimal buffer size is 0. */
      inline std::streamsize optimal_buffer_size() const { return 0; }

      std::streamsize read(char* strm_ptr, std::streamsize n) {
        return hdfsRead(filesystem, file, strm_ptr, n);
      } // end of read
      std::streamsize write(const char* strm_ptr, std::streamsize n) {
         return hdfsWrite(filesystem, file, strm_ptr, n);
      }
      bool good() const { return file != NULL; }
    }; // end of hdfs device
    
    /**
     * The basic file type has constructor matching the hdfs device.
     */
    typedef boost::iostreams::stream<hdfs_device> fstream;

    /**
     * Open a connection to the filesystem. The default arguments
     * should be sufficient for most uses 
     */
    hdfs(const std::string& host = "default", tPort port = 0) {
      filesystem =  hdfsConnect(host.c_str(), port);
      ASSERT_TRUE(filesystem != NULL); 
    } // end of constructor

    ~hdfs() { 
      const int error = hdfsDisconnect(filesystem);
      ASSERT_EQ(error, 0);
    } // end of ~hdfs
    
    inline std::vector<std::string> list_files(const std::string& path) {
      int num_files = 0;
      hdfsFileInfo* hdfs_file_list_ptr = 
        hdfsListDirectory(filesystem, path.c_str(), &num_files);
      // copy the file list to the string array
      std::vector<std::string> files(num_files);
      for(int i = 0; i < num_files; ++i) 
        files[i] = std::string(hdfs_file_list_ptr[i].mName);
      // free the file list pointer
      hdfsFreeFileInfo(hdfs_file_list_ptr, num_files);
      return files;
    } // end of list_files

    inline static bool has_hadoop() { return true; }
    
    static hdfs& get_hdfs();
  }; // end of class hdfs
#else


  class hdfs {
  public:
    /** hdfs file source is used to construct boost iostreams */
    class hdfs_device {
    public: // boost iostream concepts
      typedef char                                          char_type;
      typedef boost::iostreams::bidirectional_device_tag    category;
    public:
      hdfs_device(const hdfs& hdfs_fs, const std::string& filename,
                  const bool write = false) { 
        logstream(LOG_FATAL) << "Libhdfs is not installed on this system." 
                             << std::endl;
      }
      void close() { }
      std::streamsize read(char* strm_ptr, std::streamsize n) {
        logstream(LOG_FATAL) << "Libhdfs is not installed on this system." 
                             << std::endl;
        return 0;
      } // end of read
      std::streamsize write(const char* strm_ptr, std::streamsize n) {
        logstream(LOG_FATAL) << "Libhdfs is not installed on this system." 
                             << std::endl;
        return 0;
      }
      bool good() const { return false; }
    }; // end of hdfs device
    
    /**
     * The basic file type has constructor matching the hdfs device.
     */
    typedef boost::iostreams::stream<hdfs_device> fstream;

    /**
     * Open a connection to the filesystem. The default arguments
     * should be sufficient for most uses 
     */
    hdfs(const std::string& host = "default", int port = 0) {
      logstream(LOG_FATAL) << "Libhdfs is not installed on this system." 
                           << std::endl;
    } // end of constructor


    inline std::vector<std::string> list_files(const std::string& path) {
      logstream(LOG_FATAL) << "Libhdfs is not installed on this system." 
                           << std::endl;
      return std::vector<std::string>();;
    } // end of list_files

    // No hadoop available
    inline static bool has_hadoop() { return false; }
    
    static hdfs& get_hdfs();
  }; // end of class hdfs


#endif

}; // end of namespace graphlab
#endif


================================================
FILE: src/graphlab/util/hopscotch_map.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_UTIL_HOPSCOTCH_HASH_HPP
#define GRAPHLAB_UTIL_HOPSCOTCH_HASH_HPP

#include <graphlab/util/hopscotch_table.hpp>

#include <graphlab/serialization/serialization_includes.hpp>


#include <boost/functional/hash.hpp>
#define _HOPSCOTCH_MAP_DEFAULT_HASH boost::hash<Key>


namespace graphlab {


  /**
   * A hopscotch hash map. More or less similar
   * interface as boost::unordered_map, not necessarily
   * entirely STL compliant.
   * Really should only be used to store small keys and trivial values.
   *
   * \tparam Key The key of the map
   * \tparam Value The value to store for each key
   * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
   *              available. Otherwise defaults to boost::hash<Key>
   * \tparam KeyEqual The functor used to identify object equality. Defaults to
   *                  std::equal_to<Key>
   */
  template <typename Key,
            typename Value,
            typename Hash = _HOPSCOTCH_MAP_DEFAULT_HASH,
            typename KeyEqual = std::equal_to<Key> >
  class hopscotch_map {

  public:
    // public typedefs
    typedef Key                                      key_type;
    typedef std::pair<Key, Value>                    value_type;
    typedef Value                                    mapped_type;
    typedef size_t                                   size_type;
    typedef Hash                                     hasher;
    typedef KeyEqual equality_function;
    typedef value_type* pointer;
    typedef value_type& reference;
    typedef const value_type* const_pointer;
    typedef const value_type& const_reference;


    typedef std::pair<Key, Value>                    storage_type;

    struct hash_redirect{
      Hash hashfun;
      hash_redirect(Hash h): hashfun(h) { }
      size_t operator()(const storage_type& v) const {
        return hashfun(v.first);
      }
    };
    struct key_equal_redirect{
      KeyEqual keyeq;
      key_equal_redirect(KeyEqual k): keyeq(k) { }
      bool operator()(const storage_type& v, const storage_type& v2) const {
        return keyeq(v.first, v2.first);
      }
    };

    typedef hopscotch_table<storage_type,
                            hash_redirect,
                            key_equal_redirect> container_type;

    typedef boost::unordered_map<key_type, mapped_type, Hash> spill_type;

    struct const_iterator{
      typedef std::forward_iterator_tag iterator_category;
      typedef const typename hopscotch_map::value_type value_type;
      typedef size_t difference_type;
      typedef value_type* pointer;
      typedef value_type& reference;

      friend class hopscotch_map;

      const hopscotch_map* ptr;
      typename hopscotch_map::container_type::const_iterator iter;
      typename hopscotch_map::spill_type::const_iterator iter2;
      bool in_spill;

      const_iterator():ptr(NULL) {}


      const_iterator operator++() {
        if (!in_spill) {
          ++iter;
          if (iter == ptr->container->end()) {
            in_spill = true;
          }
        } else {
          ++iter2;
        }
        return *this;
      }

      const_iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_spill) return (*iter);
        else return *reinterpret_cast<pointer>(&(*iter2)); 
      }

      pointer operator->() {
        if (!in_spill) return &(*iter);
        else return reinterpret_cast<pointer>(&(*iter2)); 
        // this is annoying. but it unfortunately has to be this way
      }

      bool operator==(const const_iterator it) const {
        return ptr == it.ptr && 
            ((!in_spill && iter == it.iter) ||
            (in_spill && iter2 == it.iter2));
      }

      bool operator!=(const const_iterator iter) const {
        return !((*this) == iter);
      }
    private:
      const_iterator(const hopscotch_map* map,
          typename container_type::const_iterator iter,
          typename spill_type::const_iterator iter2):
        ptr(map), iter(iter), iter2(iter2), in_spill(iter == ptr->container->end()) { }
    };

    struct iterator {
      typedef std::forward_iterator_tag iterator_category;
      typedef typename hopscotch_map::value_type value_type;
      typedef size_t difference_type;
      typedef value_type* pointer;
      typedef value_type& reference;

      friend class hopscotch_map;

      hopscotch_map* ptr;
      typename hopscotch_map::container_type::iterator iter;
      typename hopscotch_map::spill_type::iterator iter2;
      bool in_spill;

      iterator():ptr(NULL) {}


      operator const_iterator() const {
        const_iterator it(ptr, iter, iter2);
        return it;
      }

      iterator operator++() {
        if (!in_spill) {
          ++iter;
          // if I went past the end of the main array,
          // go to the spill array.
          if (iter == ptr->container->end()) {
            in_spill = true;
          }
        } else {
          ++iter2;
        }
        return *this;
      }

      iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        if (!in_spill) return (*iter);
        else return *reinterpret_cast<pointer>(&(*iter2)); 
      }

      pointer operator->() {
        if (!in_spill) return &(*iter);
        else return reinterpret_cast<pointer>(&(*iter2)); 
      }

      bool operator==(const iterator it) const {
        return ptr == it.ptr && 
            ((!in_spill && iter == it.iter) ||
            (in_spill && iter2 == it.iter2));
      }

      bool operator!=(const iterator iter) const {
        return !((*this) == iter);
      }
    private:
      iterator(hopscotch_map* map,
          typename container_type::iterator iter,
          typename spill_type::iterator iter2):
        ptr(map), iter(iter), iter2(iter2), in_spill(iter == ptr->container->end()) { }
    };


  private:


    // The primary storage. Used by all sequential accessors.
    container_type* container;

    // excess elements which refuse to be inserted go here.
    spill_type  spill;

    // the hash function to use. hashes a pair<key, value> to hash(key)
    hash_redirect hashfun;

    // the equality function to use. Tests equality on only the first
    // element of the pair
    key_equal_redirect equalfun;

    container_type* create_new_container(size_t size) {
      return new container_type(size, hashfun, equalfun);
    }

    void destroy_all() {
      delete container;
      spill.clear();
      container = NULL;
    }

    // rehashes the hash table to one which is double the size
    void rehash_to_new_container(size_t newsize = (size_t)(-1)) {
      /*
         std::cerr << "Rehash at " << container->size() << "/"
         << container->capacity() << ": "
         << container->load_factor() << std::endl;
       */
      // rehash
      if (newsize == (size_t)(-1)) newsize = size() * 2;
      container_type* newcontainer = create_new_container(newsize);
      const_iterator citer = begin();
      spill_type newspill;
      while (citer != end()) {
        if(newcontainer->insert(*citer) == newcontainer->end()) {
          newspill.insert(*citer);
        }
        ++citer;
      }
      std::swap(container, newcontainer);
      std::swap(spill, newspill);
      delete newcontainer;
    }

    // Inserts a value into the hash table. This does not check
    // if the key already exists, and may produce duplicate values.
    iterator do_insert(const value_type &v) {
      typename container_type::iterator iter = container->insert(v);

      if (iter != container->end()) {
          return iterator(this, iter, spill.begin());
      }
      else {
        if (load_factor() > 0.8) {
          rehash_to_new_container();
          iter = container->insert(v);
          if(iter != container->end()) {
            return iterator(this, iter, spill.begin());
          }
          else {
            return iterator(this, container->end(), spill.insert(v).first);
          }
        } else {
          // we have a *really* terrible hash function. 
          // use the spill
          return iterator(this, container->end(), spill.insert(v).first);
        }
      }
    }

  public:

    hopscotch_map(Hash hashfun = Hash(),
                  KeyEqual equalfun = KeyEqual()):
                            container(NULL),
                            hashfun(hashfun), equalfun(equalfun) {
      container = create_new_container(32);
    }

    hopscotch_map(const hopscotch_map& h):
                            hashfun(h.hashfun), equalfun(h.equalfun) {
      container = create_new_container(h.capacity());
      (*container) = *(h.container);
      spill = h.spill;
    }

    // only increases
    void rehash(size_t s) {
      if (s > capacity()) {
        rehash_to_new_container(s);
      }
    }

    ~hopscotch_map() {
      destroy_all();
    }

    hasher hash_function() const {
      return hashfun.hashfun;
    }

    KeyEqual key_eq() const {
      return equalfun.equalfun;
    }

    hopscotch_map& operator=(const hopscotch_map& other) {
      (*container) = *(other.container);
      hashfun = other.hashfun;
      equalfun = other.equalfun;
      return *this;
    }

    size_type size() const {
      return container->size() + spill.size();
    }

    iterator begin() {
      return iterator(this, container->begin(), spill.begin());
    }

    iterator end() {
      return iterator(this, container->end(), spill.end());
    }


    const_iterator begin() const {
      return const_iterator(this, container->begin(), spill.begin());
    }

    const_iterator end() const {
      return const_iterator(this, container->end(), spill.end());
    }


    std::pair<iterator, bool> insert(const value_type& v) {
      iterator i = find(v.first);
      if (i != end()) return std::make_pair(i, false);
      else return std::make_pair(do_insert(v), true);
    }


    iterator insert(const_iterator hint, const value_type& v) {
      return insert(v).first;
    }

    iterator find(key_type const& k) {
      value_type v(k, mapped_type());
      typename container_type::iterator iter = container->find(v);
      if (iter != container->end()) {
        return iterator(this, iter, spill.begin());
      } else {
        return iterator(this, iter, spill.find(k));
      }
    }

    const_iterator find(key_type const& k) const {
      value_type v(k, mapped_type());
      typename container_type::iterator iter = container->find(v);
      if (iter != container->end()) {
        return const_iterator(this, iter, spill.begin());
      } else {
        return const_iterator(this, iter, spill.find(k));
      }
    }

    size_t count(key_type const& k) const {
      value_type v(k, mapped_type());
      return container->count(v) || spill.count(k);
    }


    bool erase(iterator iter) {
      return container->erase(iter.iter) || spill.erase(iter.iter2);
    }

    bool erase(key_type const& k) {
      value_type v(k, mapped_type());
      return container->erase(v) || spill.erase(k);
    }

    void swap(hopscotch_map& other) {
      std::swap(container, other.container);
      std::swap(spill, other.spill);
      std::swap(hashfun, other.hashfun);
      std::swap(equalfun, other.equalfun);
    }

    mapped_type& operator[](const key_type& i) {
      iterator iter = find(i);
      value_type tmp(i, mapped_type());
      if (iter == end()) iter = do_insert(tmp);
      return iter->second;
    }

    void clear() {
      destroy_all();
      container = create_new_container(128);
    }


    size_t capacity() const {
      return container->capacity() + spill.size();
    }


    float load_factor() const {
      return float(size()) / capacity();
    }

    void save(oarchive &oarc) const {
      oarc << size() << capacity();
      const_iterator iter = begin();
      while (iter != end()) {
        oarc << (*iter);
        ++iter;
      }
    }


    void load(iarchive &iarc) {
      size_t s, c;
      iarc >> s >> c;
      if (capacity() != c) {
        destroy_all();
        container = create_new_container(c);
      }
      else {
        container->clear();
      }
      for (size_t i = 0;i < s; ++i) {
        value_type v;
        iarc >> v;
        insert(v);
      }
    }

    void put(const value_type &v) {
      // try to insert into the container
      (*this)[v.first] = v.second;
    }

    void put(const Key& k, const Value& v) {
      (*this)[k] = v;
    }

    std::pair<bool, Value> get(const Key& k) const {
      const_iterator iter = find(k);
      return std::make_pair(iter == end(), iter->second);
    }
  };

}; // end of graphlab namespace

#endif


================================================
FILE: src/graphlab/util/hopscotch_set.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_UTIL_HOPSCOTCH_SET_HPP
#define GRAPHLAB_UTIL_HOPSCOTCH_SET_HPP

#include <graphlab/util/hopscotch_table.hpp>

#include <graphlab/serialization/serialization_includes.hpp>


#include <boost/functional/hash.hpp>
#define _HOPSCOTCH_SET_DEFAULT_HASH boost::hash<Key>


namespace graphlab {


  /**
   * A hopscotch hash set. More or less similar
   * interface as boost::unordered_set, not necessarily
   * entirely STL compliant.
   * Really should only be used to store small keys and trivial values.
   *
   * \tparam Key The key of the set
   * \tparam Hash The hash functor type. Defaults to std::hash<Key> if C++11 is
   *              available. Otherwise defaults to boost::hash<Key>
   * \tparam KeyEqual The functor used to identify object equality. Defaults to
   *                  std::equal_to<Key>
   */
  template <typename Key,
            typename Hash = _HOPSCOTCH_SET_DEFAULT_HASH,
            typename KeyEqual = std::equal_to<Key> >
  class hopscotch_set {

  public:
    // public typedefs
    typedef Key                                      value_type;
    typedef size_t                                   size_type;
    typedef Hash                                     hasher;
    typedef KeyEqual equality_function;
    typedef value_type* pointer;
    typedef value_type& reference;
    typedef const value_type* const_pointer;
    typedef const value_type& const_reference;


    typedef Key                    storage_type;

    typedef hopscotch_table<storage_type,
                            Hash,
                            KeyEqual> container_type;

    typedef typename container_type::iterator iterator;
    typedef typename container_type::const_iterator const_iterator;

  private:


    // The primary storage. Used by all sequential accessors.
    container_type* container;

    // the hash function to use. hashes a pair<key, value> to hash(key)
    hasher hashfun;

    // the equality function to use. Tests equality on only the first
    // element of the pair
    equality_function equalfun;

    container_type* create_new_container(size_t size) {
      return new container_type(size, hashfun, equalfun);
    }

    void destroy_all() {
      delete container;
      container = NULL;
    }

    // rehashes the hash table to one which is double the size
    container_type* rehash_to_new_container(size_t newsize = (size_t)(-1)) {
      /*
         std::cerr << "Rehash at " << container->size() << "/"
         << container->capacity() << ": "
         << container->load_factor() << std::endl;
       */
      // rehash
      if (newsize == (size_t)(-1)) newsize = container->size() * 2;
      container_type* newcontainer = create_new_container(newsize);
      const_iterator citer = begin();
      while (citer != end()) {
        assert(newcontainer->insert(*citer) != newcontainer->end());
        ++citer;
      }
      return newcontainer;
    }

    // Inserts a value into the hash table. This does not check
    // if the key already exists, and may produce duplicate values.
    iterator do_insert(const value_type &v) {
      iterator iter = container->insert(v);

      if (iter != container->end()) {
          return iter;
      }
      else {
        container_type* newcontainer = rehash_to_new_container();
        iter = newcontainer->insert(v);
        assert(iter != newcontainer->end());
        std::swap(container, newcontainer);
        delete newcontainer;
        return iter;
      }
    }

  public:

    hopscotch_set(size_t initialsize = 32,
                  Hash hashfun = Hash(),
                  KeyEqual equalfun = KeyEqual()):
                            container(NULL),
                            hashfun(hashfun), equalfun(equalfun) {
      container = create_new_container(initialsize);
    }

    hopscotch_set(const hopscotch_set& h):
                            hashfun(h.hashfun), equalfun(h.equalfun) {
      container = create_new_container(h.capacity());
      (*container) = *(h.container);
    }


    // only increases
    void rehash(size_t s) {
      if (s > capacity()) {
        container_type* newcontainer = rehash_to_new_container(s);
        std::swap(container, newcontainer);
        delete newcontainer;
      }
    }

    ~hopscotch_set() {
      destroy_all();
    }

    hasher hash_function() const {
      return hashfun;
    }

    KeyEqual key_eq() const {
      return equalfun;
    }

    hopscotch_set& operator=(const hopscotch_set& other) {
      (*container) = *(other.container);
      hashfun = other.hashfun;
      equalfun = other.equalfun;
      return *this;
    }

    size_type size() const {
      return container->size();
    }

    iterator begin() {
      return container->begin();
    }

    iterator end() {
      return container->end();
    }


    const_iterator begin() const {
      return container->begin();
    }

    const_iterator end() const {
      return container->end();
    }


    std::pair<iterator, bool> insert(const value_type& v) {
      iterator i = find(v);
      if (i != end()) return std::make_pair(i, false);
      else return std::make_pair(do_insert(v), true);
    }


    iterator insert(const_iterator hint, const value_type& v) {
      return insert(v).first;
    }

    iterator find(value_type const& v) {
      return container->find(v);
    }

    const_iterator find(value_type const& v) const {
      return container->find(v);
    }

    size_t count(value_type const& v) const {
      return container->count(v);
    }


    bool erase(iterator iter) {
      return container->erase(iter);
    }

    bool erase(value_type const& v) {
      return container->erase(v);
    }

    void swap(hopscotch_set& other) {
      std::swap(container, other.container);
      std::swap(hashfun, other.hashfun);
      std::swap(equalfun, other.equalfun);
    }

    void clear() {
      destroy_all();
      container = create_new_container(128);
    }


    size_t capacity() const {
      return container->capacity();
    }


    float load_factor() const {
      return container->load_factor();
    }

    void save(oarchive &oarc) const {
      oarc << size() << capacity();
      const_iterator iter = begin();
      while (iter != end()) {
        oarc << (*iter);
        ++iter;
      }
    }


    void load(iarchive &iarc) {
      size_t s, c;
      iarc >> s >> c;
      if (capacity() != c) {
        destroy_all();
        container = create_new_container(c);
      }
      else {
        container->clear();
      }
      for (size_t i = 0;i < s; ++i) {
        value_type v;
        iarc >> v;
        insert(v);
      }
    }
  };

}; // end of graphlab namespace

#endif


================================================
FILE: src/graphlab/util/hopscotch_table.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_HOPSCOTCH_TABLE_HPP
#define GRAPHLAB_UTIL_HOPSCOTCH_TABLE_HPP

#include <stdint.h>
#include <vector>
#include <utility>
#include <algorithm>
#include <functional>
#include <iterator>


#include <boost/functional/hash.hpp>
#define _HOPSCOTCH_TABLE_DEFAULT_HASH boost::hash<T>


namespace graphlab {


/**
  * This defines a hash table where each entry stores a
  * fixed data type T. The data type T should be <b>small</b>
  * and should preferably fit in a couple of words.
  * This hash table is not resizeable. Use the hopscotch_map
  * For a more general purpose table.
  *
  * \tparam T The data type stored in the hash table
  * \tparam Hash The hash functor type. Defaults to std::hash<T> if C++11 is
  *              available. Otherwise defaults to boost::hash<T>
  * \tparam KeyEqual The functor used to identify object equality. Defaults to
  *                  std::equal_to<T>
  */
template <typename T,
         typename Hash = _HOPSCOTCH_TABLE_DEFAULT_HASH,
         typename KeyEqual = std::equal_to<T> >
class hopscotch_table {
  public:
    /// The data type stored in the table
    typedef T              value_type;
    typedef size_t                                   size_type;
    /// The type of the hasher object
    typedef Hash                                     hasher;
    /// The type of the equality tester
    typedef KeyEqual equality_function;
    /// A pointer to the data type stored in the table
    typedef value_type* pointer;
    /// A reference to the data type stored in the table
    typedef value_type& reference;
    /// A constant pointer to the data type stored in the table
    typedef const value_type* const_pointer;
    /// A constant reference to the data type stored in the table
    typedef const value_type& const_reference;

  private:
    /// The actual contents of the hash table
    struct element {
      bool hasdata: 1;      /// Whether this entry has data.
      uint32_t field: 31;   /// The hopscotch bitfield. Only 31 bits are useable
      T elem;  /// User data
      element():hasdata(false), field(0) { }
    };

    std::vector<element> data;

    hasher hashfun;
    equality_function equalfun;
    size_t numel;
    size_t mask;

    /// Returns the next power of 2 of a value
    static uint64_t next_powerof2(uint64_t val) {
      --val;
      val = val | (val >> 1);
      val = val | (val >> 2);
      val = val | (val >> 4);
      val = val | (val >> 8);
      val = val | (val >> 16);
      val = val | (val >> 32);
      return val + 1;
    }

    /** Computes the hash of the data. And perturbs it
      * using either CRC32 or Jenkin's 32-bit mix
      */
    size_t compute_hash(const value_type& d) const {
      size_t state = hashfun(d);
#ifdef HAS_BUILTIN_CRC32
      return __builtin_ia32_crc32di(0, state);
#else
    /*
     * Bob Jenkin's 32 bit integer mix function from
     * http://home.comcast.net/~bretm/hash/3.html
     */
      state += (state << 12);
      state ^= (state >> 22);
      state += (state << 4);
      state ^= (state >> 9);
      state += (state << 10);
      state ^= (state >> 2);
      state += (state << 7);
      state ^= (state >> 12);
      return state;
#endif
    }


  public:
    /**
     * Constructs a hopscotch table of a given length.
     *
     * \param len This rounded up to the next power of 2 will be used as
     *            the length of the table. This table is not resizeable.
     * \param hashfun The hasher functor. Defaults to Hash()
     * \param equalfun A functor used to test for equality. Defaults to KeyEqual()
     */
    hopscotch_table(size_t len,
                    Hash hashfun = Hash(),
                    KeyEqual equalfun = KeyEqual()):
                                              data(next_powerof2(len) + 32),
                                              hashfun(hashfun),
                                              equalfun(equalfun),
                                              numel(0),
                                              mask(data.size() - 32 - 1) {
    }

    /// Returns the hash function used by the hash table
    hasher hash_function() const {
      return hashfun;
    }

    /// Returns the equality function used by the hash table
    equality_function key_eq() const {
      return equalfun;
    }

    /**
      * A const iterator which allows iteration over the hash table
      * entries. Insertions may disrupt the iterator order. Deletions
      * invalidate the iterator.
      */
    struct const_iterator {
      typedef std::forward_iterator_tag iterator_category;
      typedef const typename hopscotch_table::value_type value_type;
      typedef size_t difference_type;
      typedef value_type* pointer;
      typedef value_type& reference;

      friend class hopscotch_table;

      const hopscotch_table* ptr;
      typename std::vector<element>::const_iterator iter;

      const_iterator():ptr(NULL) {}

      const_iterator operator++() {
        ++iter;
        while(iter != ptr->data.end() && !iter->hasdata) {
          ++iter;
        }
        return *this;
      }

      const_iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        return iter->elem;
      }

      pointer operator->() {
        return &(iter->elem);
      }

      bool operator==(const const_iterator it) const {
        return ptr == it.ptr && iter == it.iter;
      }

      bool operator!=(const const_iterator iter) const {
        return !((*this) == iter);
      }


    private:
      const_iterator(const hopscotch_table* table,
          typename std::vector<element>::const_iterator iter):
        ptr(table), iter(iter) { }
    };


    /**
      * A const iterator which allows iteration over the hash table
      * entries. Insertions may disrupt the iterator order. Deletions
      * invalidate the iterator.
      */
    struct iterator {
      typedef std::forward_iterator_tag iterator_category;
      typedef typename hopscotch_table::value_type value_type;
      typedef size_t difference_type;
      typedef value_type* pointer;
      typedef value_type& reference;

      friend class hopscotch_table;

      hopscotch_table* ptr;
      typename std::vector<element>::iterator iter;

      iterator():ptr(NULL) {}


      operator const_iterator() const {
        const_iterator it(ptr, iter);
        return it;
      }

      iterator operator++() {
        ++iter;
        while(iter != ptr->data.end() && !iter->hasdata) {
          ++iter;
        }
        return *this;
      }

      iterator operator++(int) {
        iterator cur = *this;
        ++(*this);
        return cur;
      }


      reference operator*() {
        return iter->elem;
      }

      pointer operator->() {
        return &(iter->elem);
      }

      bool operator==(const iterator it) const {
        return ptr == it.ptr && iter == it.iter;
      }

      bool operator!=(const iterator iter) const {
        return !((*this) == iter);
      }


    private:
      iterator(hopscotch_table* table,
          typename std::vector<element>::iterator iter):
        ptr(table), iter(iter) { }
    };

    /** Standard insert iterator. Writing into this iterator
     *  will cause insertions to occur. It is however, recommended
     *  that the insert() operation be used instead of the insert_iterator
     *  since the insert_iterator silently fails on insert failure.
     */
    struct insert_iterator{
      hopscotch_table* cmap;
      typedef std::forward_iterator_tag iterator_category;
      typedef typename hopscotch_table::value_type value_type;

      insert_iterator(hopscotch_table* c):cmap(c) {}

      insert_iterator operator++() {
        return (*this);
      }
      insert_iterator operator++(int) {
        return (*this);
      }

      insert_iterator& operator*() {
        return *this;
      }
      insert_iterator& operator=(const insert_iterator& i) {
        cmap = i.cmap;
        return *this;
      }

      insert_iterator& operator=(const value_type& v) {
        cmap->insert(v);
        return *this;
      }
    };

  private:
    /**
     *  Searches for a target entry and overwrites if it exists
     */
    iterator try_find_and_overwrite(const value_type& newdata,
                                    size_t target,
                                    bool overwrite) {
       // find the next empty entry
      iterator iter = find_impl(newdata, target);
      if (iter != end() && overwrite) {
        iter.iter->elem = newdata;
      }
      return iter;
    }

    /**
     *  If overwrite is set, it will additionally check for existance
     *  of the entry and overwrite if it exists.
     * Iterator is not going to be necessarily valid under parallel access.
     */
    iterator insert_impl(const value_type& newdata, bool overwrite = true) {
      // find the next empty entry
      size_t target = compute_hash(newdata) & mask;

      iterator ret = try_find_and_overwrite(newdata,
                                            target,
                                            overwrite);
      if (ret != end()) return ret;

      // search for a place to stick it into
      bool found = false;
      size_t shift_target = target;
      // let max range is 31 * 20
      size_t limit = std::min(data.size(), target + 31 * 20);
      for (;shift_target < limit; shift_target++) {
        if (data[shift_target].hasdata == false) {
          // double check
          if (data[shift_target].hasdata == false) {
            // yup still true.
            // we got an empty value.
            // quit the search
            found = true;
            break;
          }
        }
      }

      if (!found) {
        // failed to find a place to put this value.
        return iterator(this, data.end());
      }

      // while the shift target is out of range
      while(shift_target - target >= 31) {
        // search backwards
        // we would like to jump as far as possible
        // find an hash entry whose field placed something
        // between here and the shift target
        // and move it to the shift target.
        // for i = 31 to 1
        found = false;

        for (size_t i = 30; i >= 1; --i) {
          size_t r;
          if (data[shift_target - i].field) {
            r = __builtin_ctz(data[shift_target - i].field);
            if (r <= i) {
              // shift
              size_t new_shift_target = shift_target - i + r;
              assert(data[new_shift_target].hasdata);
              data[shift_target].elem = data[new_shift_target].elem;
              data[shift_target].hasdata = true;
              data[new_shift_target].hasdata = false;
              data[new_shift_target].elem = T();

              // unset the bit for r and set the bit for i
              data[shift_target - i].field =
                (data[shift_target - i].field & ~((uint32_t)1 << r))
                 | ((uint32_t)1 << i);
              shift_target = new_shift_target;
              found = true;
              break;
            }
          }
        }

        if (!found) {
          return iterator(this, data.end());
        }
      }
      // insert and return
      // we need to lock ID - 1 so as to ensure intersection with the hash target
      data[shift_target].elem = newdata;
      data[target].field |= (1 << (shift_target - target));
      data[shift_target].hasdata = true;
      ++numel;
      return iterator(this, data.begin() + shift_target);
    }


    /**
      * Searches for an entry and returns an iterator to the entry.
      * The hash entry of the key is provided.
      * KeyEqual will be used to identify if an entry matches the request.
      * return end() on failure.
      */
    const_iterator find_impl(const value_type& key, size_t target) const {
      uint32_t field = data[target].field;
      while (field > 0) {
        int r = __builtin_ctz(field);
        if (data[target + r].hasdata &&
            key_eq()(data[target + r].elem, key)) {
          return const_iterator(this, data.begin() + target + r);
        }
        else {
          // mask out the current bit and try again.
          field &= ~(((uint32_t)1 << r));
        }
      }
      return const_iterator(this, data.end());
    }


    iterator find_impl(const value_type& key, size_t target) {
      const_iterator iter = ((const hopscotch_table*)(this))->find_impl(key, target);
      return iterator(this, data.begin() + (iter.iter - data.begin()));
    }

  public:
    /**
      * Inserts an entry into the array.
      * Returns an iterator to the just inserted data on success.
      * If the entry already exists, it will be overwritten.
      * Returns end() on failure.
      */
    iterator insert(const value_type& newdata) {
      return insert_impl(newdata);
    }

    /**
      * Inserts an entry into the array.
      * Returns an iterator to the just inserted data on success.
      * This function check if the entry already exists, if it does,
      * do nothing
      * Returns end() on failure.
      */
    iterator insert_do_not_overwrite(const value_type& newdata) {
      return insert_impl(newdata, false);
    }


    /**
      * Searches for an entry and returns an iterator to the entry.
      * KeyEqual will be used to identify if an entry matches the request.
      * return end() on failure.
      */
    const_iterator find(const value_type& key) const {
      size_t target = compute_hash(key) & mask;
      return find_impl(key, target);
    }

    /**
      * Searches for an entry and returns an iterator to the entry.
      * KeyEqual will be used to identify if an entry matches the request.
      * return end() on failure.
      */
    iterator find(const value_type& key) {
      const_iterator iter = ((const hopscotch_table*)(this))->find(key);
      return iterator(this, data.begin() + (iter.iter - data.begin()));
    }


    void clear() {
      for (size_t i = 0;i < data.size(); ++i) {
        data[i].hasdata = false;
        data[i].field = 0;
        data[i].elem = value_type();
      }
      numel = 0;
    }

   /**
    * Erases an entry pointed to by an iterator.
    */
    bool erase(iterator iter)  {
      if (iter.iter == data.end()) return false;
      assert(iter.iter->hasdata);
      size_t target = compute_hash(iter.iter->elem) & mask;
      size_t offset = iter.iter - (data.begin() + target);
      assert(offset < 31);
      --numel;
      iter.iter->hasdata = false;
      iter.iter->elem = value_type();
      data[target].field &=  ~((uint32_t)1 << offset);
      return true;
    }

    /// Erases a entry matching a given value.
    bool erase(const value_type& key) {
      return erase(find(key));
    }

    /// Returns an iterator to the start of the table
    iterator begin() {
      // find the first which is not empty
      typename std::vector<element>::iterator iter = data.begin();
      while (iter != data.end() && !iter->hasdata) {
        ++iter;
      }
      return iterator(this, iter);
    }

    /// Returns an iterator to the start of the table
    const_iterator begin() const {
      // find the first which is not empty
      typename std::vector<element>::iterator iter = data.begin();
      while (iter != data.end() && !iter->hasdata) {
        ++iter;
      }
      return const_iterator(this, iter);
    }

    /// Returns an iterator to the end of the table
    iterator end() {
      return iterator(this, data.end());
    }

    /// Returns an iterator to the end of the table
    const_iterator end() const {
      return const_iterator(this, data.end());
    }

    /// Returns 1 if the table contains a given element. 0 otherwise.
    size_t count(const value_type& v) const {
      return find(v) != end();
    }

    /// Returns true if the table contains a given element. false otherwise.
    bool contains(const value_type& v) const {
      return find(v) != end();
    }

    /// Returns the number of elements in the table
    size_t size() const {
      return numel;
    }

    /// Returns the capacity of the table
    size_t capacity() const {
      return data.size();
    }

    float load_factor() const {
      return float(size()) / capacity();
    }

    // now for the safe accessors

    hopscotch_table& operator=(const hopscotch_table& other) {
      data = other.data;
      hashfun = other.hashfun;
      equalfun = other.equalfun;
      numel = other.numel;
      mask = other.mask;
      return *this;
    }


    /** Inserts an element into the hash table. Safe under parallel access.
      * if t already exists, it will be overwritten
      */
    bool put(const T& t) {
      // since data is not resizeable,
      // data.end() is always valid.
      return insert_impl(t).iter != data.end();
    }


    /** Inserts an element into the hash table. Safe under parallel access.
      * if t already exists, nothing will happen
      */
    bool put_do_not_overwrite(const T& t) {
      // since data is not resizeable,
      // data.end() is always valid.
      return insert_impl(t, false).iter != data.end();
    }


    /** If the argument is found in the hash table,
     *  return {true, V} where V is the hash table content matching the argument.
     *  Otherwise {false, T()} is returned.
     *  KeyEqual() is used to compare entries.
     *  Safe under parallel access.
     */
    std::pair<bool, T> get(const T& t) const {
      // fast path. Try to get it without locking
      const_iterator iter = find(t);
      if (iter != end()) {
        // take a snapshot of the data
        element e = *(iter.iter);
        if (e.hasdata && key_eq()(e.elem, t)) {
          return std::make_pair(true, e.elem);
        }
      }

      return std::make_pair(false, T());
    }
};

} // graphlab
#endif


================================================
FILE: src/graphlab/util/inplace_lf_queue.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/inplace_lf_queue.hpp>
namespace graphlab {
void inplace_lf_queue::enqueue(char* c) {
  // clear the next pointer
  (*get_next_ptr(c)) = NULL;
  // atomically,
  // swap(tail, c)
  // tail->next = c;
  char* prev = c;
  atomic_exchange(tail, prev);
  (*get_next_ptr(prev)) = c;
  asm volatile ("" : : : "memory");
}


void inplace_lf_queue::enqueue_unsafe(char* c) {
  // clear the next pointer
  (*get_next_ptr(c)) = NULL;
  // swap(tail, c)
  // tail->next = c;
  char* prev = c;
  std::swap(tail, prev);
  (*get_next_ptr(prev)) = c;
}


char* inplace_lf_queue::dequeue_all() {
  // head is the sentinel
  char* ret_head = get_next(head);
  if (ret_head == NULL) return NULL;
  // now, the sentinel is not actually part of the queue.
  // by the time get_next(sentinel) is non-empty, enqueue must have completely
  // finished at least once, since the next ptr is only connected in line 11.
  // enqueue the sentinel. That will be the new head of the queue.
  // Anything before the sentinel is "returned". And anything after is part
  // of the queue
  enqueue(sentinel);

  // The last element in the returned queue
  // will point to the sentinel.
  return ret_head;
}

char* inplace_lf_queue::dequeue_all_unsafe() {
  // head is the sentinel
  char* ret_head = get_next(head);
  if (ret_head == NULL) return NULL;
  // now, the sentinel is not actually part of the queue.
  // by the time get_next(sentinel) is non-empty, enqueue must have completely
  // finished at least once, since the next ptr is only connected in line 11.
  // enqueue the sentinel. That will be the new head of the queue.
  // Anything before the sentinel is "returned". And anything after is part
  // of the queue
  enqueue_unsafe(sentinel);

  // The last element in the returned queue
  // will point to the sentinel.
  return ret_head;
}


} // namespace graphlab


================================================
FILE: src/graphlab/util/inplace_lf_queue.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INPLACE_LOCKFREE_QUEUE_HPP
#define GRAPHLAB_INPLACE_LOCKFREE_QUEUE_HPP
#include <stdint.h>
#include <cstring>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/atomic_ops.hpp>
#include <utility>
namespace graphlab {

/*
 * A lock free queue where each element is a byte sequence,
 * where the first 8 bytes can be used for a next pointer.
 *
 * head is the head of the queue. Always sentinel.
 * tail is current last element of the queue.
 * completed is the last element that is completely inserted.
 * There can only be one thread dequeueing.
 *
 * On dequeue_all, the dequeu-er should use get_next() to get the
 * next element in the list. If get_next() returns NULL, it should spin
 * until not null, and quit only when end_of_dequeue_list() evaluates to true
 */
class inplace_lf_queue {
 public:
   inline inplace_lf_queue():head(sentinel),tail(sentinel) {
     for (size_t i = 0;i < sizeof(size_t); ++i) sentinel[i] = 0;
   }

   void enqueue(char* c);

   void enqueue_unsafe(char* c);

   char* dequeue_all();

   char* dequeue_all_unsafe();

   static inline char* get_next(char* ptr) {
     return *(reinterpret_cast<char**>(ptr));
   }

   static inline char** get_next_ptr(char* ptr) {
     return reinterpret_cast<char**>(ptr);
   }

   inline const bool end_of_dequeue_list(char* ptr) {
     return ptr == sentinel;
   }

 private:

   char sentinel[sizeof(size_t)];
   char* head;
   char* tail;

   char cache_line_padding[64 - 24];
};


} // namespace graphlab

#endif


================================================
FILE: src/graphlab/util/inplace_lf_queue2.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INPLACE_LOCKFREE_QUEUE2_HPP
#define GRAPHLAB_INPLACE_LOCKFREE_QUEUE2_HPP
#include <stdint.h>
#include <cstring>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/atomic_ops.hpp>
#include <utility>
namespace graphlab {

/*
 * A lock free queue which requires the stored element to have a
 * next pointer.
 *
 * head is the head of the queue. Always sentinel.
 * tail is current last element of the queue.
 * completed is the last element that is completely inserted.
 * There can only be one thread dequeueing.
 *
 * On dequeue_all, the dequeu-er should use get_next() to get the
 * next element in the list. If get_next() returns NULL, it should spin
 * until not null, and quit only when end_of_dequeue_list() evaluates to true
 */
template <typename T>
class inplace_lf_queue2 {
 public:
   inline inplace_lf_queue2():sentinel(new T), head(sentinel), tail(sentinel) {
     sentinel->next = NULL;
   }

   ~inplace_lf_queue2() {
     delete sentinel;
   }

   void enqueue(T* c) {
     // clear the next pointer
     (*get_next_ptr(c)) = NULL;
     // atomically,
     // swap(tail, c)
     // tail->next = c;
     T* prev = c;
     atomic_exchange(tail, prev);
     (*get_next_ptr(prev)) = c;
     numel.inc();
     asm volatile ("" : : : "memory");
   }

   size_t approx_size() {
    return numel;
   }

   bool empty() const {
     return head->next == NULL;
   }

   T* dequeue_all() {
     // head is the sentinel
     T* ret_head = get_next(head);
     if (ret_head == NULL) return NULL;
     // now, the sentinel is not actually part of the queue.
     // by the time get_next(sentinel) is non-empty, enqueue must have completely
     // finished at least once, since the next ptr is only connected in line 11.
     // enqueue the sentinel. That will be the new head of the queue.
     // Anything before the sentinel is "returned". And anything after is part
     // of the queue
     enqueue(sentinel);
     numel = 0;
     // The last element in the returned queue
     // will point to the sentinel.
     return ret_head;
   }

   static inline T* get_next(T* ptr) {
     return ptr->next;
   }

   static inline T** get_next_ptr(T* ptr) {
     return &(ptr->next);
   }

   T* end_of_dequeue_list() {
     return sentinel;
   }

   inline const bool end_of_dequeue_list(T* ptr) {
     return ptr == (sentinel);
   }

 private:

   atomic<size_t> numel;
   T* sentinel;
   T* head;
   T* tail;
};


} // namespace graphlab

#endif


================================================
FILE: src/graphlab/util/integer_mix.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INTEGER_MIX_HPP
#define GRAPHLAB_INTEGER_MIX_HPP
#include <stdint.h>
namespace graphlab {
// Jenkin's 32 bit integer mix from
// http://burtleburtle.net/bob/hash/integer.html
inline uint32_t integer_mix(uint32_t a) {
  a -= (a<<6);
  a ^= (a>>17);
  a -= (a<<9);
  a ^= (a<<4);
  a -= (a<<3);
  a ^= (a<<10);
  a ^= (a>>15);
  return a;
}

}
#endif


================================================
FILE: src/graphlab/util/integer_selector.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_INTEGER_SELECTOR_HPP
#define GRAPHLAB_INTEGER_SELECTOR_HPP
#include <stdint.h>
namespace graphlab {
  
  template <int len>
  struct u_integer_selector {
    // invalid
  };

  template <>
  struct u_integer_selector<1> {
    typedef uint8_t integer_type;
  };

  template <>
  struct u_integer_selector<2> {
    typedef uint16_t integer_type;
  };

  template <>
  struct u_integer_selector<4> {
    typedef uint32_t integer_type;
  };

  template <>
  struct u_integer_selector<8> {
    typedef uint64_t integer_type;
  };

}
#endif


================================================
FILE: src/graphlab/util/lock_free_internal.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_LOCK_FREE_INTERNAL_HPP
#define GRAPHLAB_UTIL_LOCK_FREE_INTERNAL_HPP

#include <graphlab/util/generics/integer_selector.hpp>

namespace graphlab {
namespace lock_free_internal {

template <typename index_type>
union reference_with_counter {
  struct {
    index_type val;
    index_type counter;
  } q;
  index_type& value() {
    return q.val;
  }
  index_type& counter() {
    return q.counter;
  }
  typename u_integer_selector<sizeof(index_type) * 2>::integer_type combined;
};
  
}
}
#endif


================================================
FILE: src/graphlab/util/lock_free_pool.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef LOCK_FREE_POOL_HPP
#define LOCK_FREE_POOL_HPP
#include <stdint.h>
#include <vector>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/util/lock_free_internal.hpp>
#include <graphlab/util/branch_hints.hpp>

namespace graphlab {
  template <typename T, typename index_type = uint32_t>
  class lock_free_pool{
  private:
    std::vector<T> data;
    T* lower_ptrlimit;
    T* upper_ptrlimit;
    // freelist[i] points to the next free list element
    // if freelist[i] == index_type(-1), then it is the last element
    // allocated entries are set to index_type(0), though
    // note that there is no way to disambiguate between allocated
    // and non-allocated entries by simply looking at the freelist
    std::vector<index_type> freelist;
    typedef lock_free_internal::reference_with_counter<index_type> queue_ref_type;
    volatile queue_ref_type freelisthead;

  public:
    lock_free_pool(size_t poolsize = 0) { reset_pool(poolsize); }
  
  
    void reset_pool(size_t poolsize) {
      if (poolsize == 0) {
        data.clear();
        freelist.clear();
        lower_ptrlimit = NULL;
        upper_ptrlimit = NULL;
      } else {
        data.resize(poolsize);
        freelist.resize(poolsize);
        for (index_type i = 0;i < freelist.size(); ++i) {
          freelist[i] = i + 1;
        }
        freelist[freelist.size() - 1] = index_type(-1);
        lower_ptrlimit = &(data[0]);
        upper_ptrlimit = &(data[data.size() - 1]);
      }
      freelisthead.q.val = 0;
      freelisthead.q.counter = 0;
    }
  
    std::vector<T>& unsafe_get_pool_ref() { return data; }
  
    T* alloc() {
      // I need to atomically advance freelisthead to the freelist[head]
      queue_ref_type oldhead;
      queue_ref_type newhead;
      do {
        oldhead.combined = freelisthead.combined;
        if (oldhead.q.val == index_type(-1)) return new T; // ran out of pool elements
        newhead.q.val = freelist[oldhead.q.val];
        newhead.q.counter = oldhead.q.counter + 1;
      } while(!atomic_compare_and_swap(freelisthead.combined, 
                                       oldhead.combined, 
                                       newhead.combined));
      freelist[oldhead.q.val] = index_type(-1);
      return &(data[oldhead.q.val]);
    }
  
    void free(T* p) {
      // is this from the pool?
      // if it is below the pointer limits
      if (__unlikely__(p < lower_ptrlimit || p > upper_ptrlimit)) {
        delete p;
        return;
      }
    
      index_type cur = index_type(p - &(data[0]));

      // prepare for free list insertion
      // I need to atomically set freelisthead == cur
      // and freelist[cur] = freelisthead
      queue_ref_type oldhead;
      queue_ref_type newhead;
      do{
        oldhead.combined = freelisthead.combined;
        freelist[cur] = oldhead.q.val;
        newhead.q.val = cur;
        newhead.q.counter = oldhead.q.counter + 1;
        // now try to atomically move freelisthead
      } while(!atomic_compare_and_swap(freelisthead.combined, 
                                       oldhead.combined, 
                                       newhead.combined));
    }
  }; // end of lock free pool

}; // end of graphlab namespace
#endif


================================================
FILE: src/graphlab/util/memory_info.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <iostream>
#ifdef HAS_TCMALLOC
#include <google/malloc_extension.h>
#endif
#include <graphlab/logger/assertions.hpp>

namespace graphlab {
  namespace memory_info {

    bool available() {
#ifdef HAS_TCMALLOC
      return true;
#else
      return false;
#endif
    } // end of available


    size_t heap_bytes() {
      size_t heap_size(0);
#ifdef HAS_TCMALLOC
      MallocExtension::instance()->
        GetNumericProperty("generic.heap_size", &heap_size);
#else
      logstream(LOG_WARNING) <<
        "memory_info::heap_bytes() requires tcmalloc" << std::endl;
#endif
      return heap_size;
    } // end of heap size


    size_t allocated_bytes() {
      size_t allocated_size(0);
#ifdef HAS_TCMALLOC
      MallocExtension::instance()->
        GetNumericProperty("generic.current_allocated_bytes",
                           &allocated_size);
#else
      logstream_once(LOG_WARNING) <<
        "memory_info::allocated_bytes() requires tcmalloc" << std::endl;
#endif
      return allocated_size;
    } // end of allocated bytes


    void print_usage(const std::string& label) {
#ifdef HAS_TCMALLOC
        const double BYTES_TO_MB = double(1) / double(1024 * 1024);
        std::cout
          << "Memory Info: " << label << std::endl
          << "\t Heap: " << (heap_bytes() * BYTES_TO_MB) << " MB"
          << std::endl
          << "\t Allocated: " << (allocated_bytes() * BYTES_TO_MB) << " MB"
          << std::endl;
#else
        logstream_once(LOG_WARNING)
          << "Unable to print memory info for: " << label << ". "
          << "No memory extensions api available." << std::endl;
#endif
    } // end of print_usage

    void log_usage(const std::string& label) {
#ifdef HAS_TCMALLOC
        const double BYTES_TO_MB = double(1) / double(1024 * 1024);
        logstream(LOG_INFO)
          << "Memory Info: " << label
          << "\n\t Heap: " << (heap_bytes() * BYTES_TO_MB) << " MB"
          << "\n\t Allocated: " << (allocated_bytes() * BYTES_TO_MB) << " MB"
          << std::endl;
#else
        logstream_once(LOG_WARNING)
          << "Unable to print memory info for: " << label << ". "
          << "No memory extensions api available." << std::endl;
#endif
    } // end of log usage


  }; // end of namespace memory info

}; // end of graphlab namespace


================================================
FILE: src/graphlab/util/memory_info.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_MEMORY_INFO_HPP
#define GRAPHLAB_MEMORY_INFO_HPP

namespace graphlab {
  /**
   * \internal \brief Memory info namespace contains functions used to
   * compute memory usage.
   *
   * The memory info functions require TCMalloc to actually compute
   * memory usage values. If TCMalloc is not present then calls to
   * memory info will generate warnings and return the default value.
   */
  namespace memory_info {

    /**
     * \interanl 
     *
     * \brief Returns whether memory info reporting is
     * available on this system (if memory_info was built with TCMalloc)
     *
     * @return if memory info is available on this system.
     */
    bool available();

    /**
     * \internal
     * 
     * \brief Estimates the total current size of the memory heap in
     * bytes. If memory info is not available then 0 is returned.
     *
     * @return size of heap in bytes
     */
    size_t heap_bytes();

    /**
     * \internal
     *
     * \brief Determines the total number of allocated bytes.  If
     * memory info is not available then 0 is returned.
     *
     * @return the total bytes allocated 
     */
    size_t allocated_bytes();

    /**
     * \internal
     * 
     * \brief Print a memory usage summary prefixed by the string
     * argument.
     *
     * @param [in] label the string to print before the memory usage summary.
     */
    void print_usage(const std::string& label = "");

    /**
     * \internal
     * 
     * \brief Log a memory usage summary prefixed by the string
     * argument.
     *
     * @param [in] label the string to print before the memory usage summary.
     */
    void log_usage(const std::string& label = "");
  } // end of namespace memory info
};

#endif


================================================
FILE: src/graphlab/util/mpi_tools.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/net_util.hpp>
#include <graphlab/util/mpi_tools.hpp>

namespace graphlab {
  namespace mpi_tools {
    void get_master_ranks(std::set<size_t>& master_ranks) {
      uint32_t local_ip = get_local_ip();
      std::vector<uint32_t> all_ips;
      all_gather(local_ip, all_ips);
      std::set<uint32_t> visited_ips;
      master_ranks.clear();
      for(size_t i = 0; i < all_ips.size(); ++i) {
        if(visited_ips.count(all_ips[i]) == 0) {
          visited_ips.insert(all_ips[i]);
          master_ranks.insert(i);
        }
      }
    }
  } 
}


================================================
FILE: src/graphlab/util/mpi_tools.hpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_MPI_TOOLS
#define GRAPHLAB_MPI_TOOLS

#ifdef HAS_MPI
#include <mpi.h>
#endif

#include <vector>

#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/stream.hpp>

#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/util/charstream.hpp>
#include <graphlab/util/net_util.hpp>


#include <graphlab/macros_def.hpp>

namespace graphlab {
  namespace mpi_tools {


    /**
     * The init function is used to initialize MPI and must be called
     * to clean the command line arguments.
     */
    inline void init(int& argc, char**& argv) {
#ifdef HAS_MPI
      const int required(MPI_THREAD_SINGLE);
      int provided(-1);
      int error = MPI_Init_thread(&argc, &argv, required, &provided);
      assert(provided >= required);
      assert(error == MPI_SUCCESS);
#else
      logstream(LOG_EMPH) << "MPI Support was not compiled." << std::endl;
#endif
    } // end of init

    inline void finalize() {
#ifdef HAS_MPI
      int error = MPI_Finalize();
      assert(error == MPI_SUCCESS);
#endif
    } // end of finalize


    inline bool initialized() {
#ifdef HAS_MPI
      int ret_value = 0;
      int error = MPI_Initialized(&ret_value);
      assert(error == MPI_SUCCESS);
      return ret_value;
#else
      return false;
#endif
    } // end of initialized

    inline size_t rank() {
#ifdef HAS_MPI
      int mpi_rank(-1);
      MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
      assert(mpi_rank >= 0);
      return size_t(mpi_rank);
#else
      return 0;
#endif
    }

    inline size_t size() {
#ifdef HAS_MPI
      int mpi_size(-1);
      MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
      assert(mpi_size >= 0);
      return size_t(mpi_size);
#else
      return 1;
#endif
    }


    template<typename T>
    void all_gather(const T& elem, std::vector<T>& results) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      size_t mpi_size(size());
      if(results.size() != mpi_size) results.resize(mpi_size);

      // Serialize the local map
      graphlab::charstream cstrm(128);
      graphlab::oarchive oarc(cstrm);
      oarc << elem;
      cstrm.flush();
      char* send_buffer = cstrm->c_str();
      int send_buffer_size = (int)cstrm->size();
      assert(send_buffer_size >= 0);

      // compute the sizes
      std::vector<int> recv_sizes(mpi_size, -1);
      // Compute the sizes
      int error = MPI_Allgather(&send_buffer_size,  // Send buffer
                                1,                  // send count
                                MPI_INT,            // send type
                                &(recv_sizes[0]),  // recvbuffer
                                1,                  // recvcount
                                MPI_INT,           // recvtype
                                MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
      for(size_t i = 0; i < recv_sizes.size(); ++i)
        assert(recv_sizes[i] >= 0);


      // Construct offsets
      std::vector<int> recv_offsets(recv_sizes);
      int sum = 0, tmp = 0;
      for(size_t i = 0; i < recv_offsets.size(); ++i) {
        tmp = recv_offsets[i]; recv_offsets[i] = sum; sum += tmp;
      }

      // if necessary realloac recv_buffer
      std::vector<char> recv_buffer(sum);

      // recv all the maps
      error = MPI_Allgatherv(send_buffer,         // send buffer
                             send_buffer_size,    // how much to send
                             MPI_BYTE,            // send type
                             &(recv_buffer[0]),   // recv buffer
                             &(recv_sizes[0]),    // amount to recv
                                                  // for each cpuess
                             &(recv_offsets[0]),  // where to place data
                             MPI_BYTE,
                             MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
      // Update the local map
      namespace bio = boost::iostreams;
      typedef bio::stream<bio::array_source> icharstream;
      icharstream strm(&(recv_buffer[0]), recv_buffer.size());
      graphlab::iarchive iarc(strm);
      for(size_t i = 0; i < results.size(); ++i) {
        iarc >> results[i];
      }
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of mpi all gather


    template<typename T>
    void all2all(const std::vector<T>& send_data,
                 std::vector<T>& recv_data) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      size_t mpi_size(size());
      ASSERT_EQ(send_data.size(), mpi_size);
      if(recv_data.size() != mpi_size) recv_data.resize(mpi_size);

      // Serialize the output data and compute buffer sizes
      graphlab::charstream cstrm(128);
      graphlab::oarchive oarc(cstrm);
      std::vector<int> send_buffer_sizes(mpi_size);
      for(size_t i = 0; i < mpi_size; ++i) {
        const size_t OLD_SIZE(cstrm->size());
        oarc << send_data[i];
        cstrm.flush();
        const size_t ELEM_SIZE(cstrm->size() - OLD_SIZE);
        send_buffer_sizes[i] = ELEM_SIZE;
      }
      cstrm.flush();
      char* send_buffer = cstrm->c_str();
      std::vector<int> send_offsets(send_buffer_sizes);
      int total_send = 0;
      for(size_t i = 0; i < send_offsets.size(); ++i) {
        const int tmp = send_offsets[i];
        send_offsets[i] = total_send;
        total_send += tmp;
      }

      // AlltoAll scatter the buffer sizes
      std::vector<int> recv_buffer_sizes(mpi_size);
      int error = MPI_Alltoall(&(send_buffer_sizes[0]),
                               1,
                               MPI_INT,
                               &(recv_buffer_sizes[0]),
                               1,
                               MPI_INT,
                               MPI_COMM_WORLD);
      ASSERT_EQ(error, MPI_SUCCESS);

      // Construct offsets
      std::vector<int> recv_offsets(recv_buffer_sizes);
      int total_recv = 0;
      for(size_t i = 0; i < recv_offsets.size(); ++i){
        const int tmp = recv_offsets[i];
        recv_offsets[i] = total_recv;
        total_recv += tmp;
      }
      // Do the massive send
      std::vector<char> recv_buffer(total_recv);
      error = MPI_Alltoallv(send_buffer,
                            &(send_buffer_sizes[0]),
                            &(send_offsets[0]),
                            MPI_BYTE,
                            &(recv_buffer[0]),
                            &(recv_buffer_sizes[0]),
                            &(recv_offsets[0]),
                            MPI_BYTE,
                            MPI_COMM_WORLD);
      ASSERT_EQ(error, MPI_SUCCESS);

      // Deserialize the result
      namespace bio = boost::iostreams;
      typedef bio::stream<bio::array_source> icharstream;
      icharstream strm(&(recv_buffer[0]), recv_buffer.size());
      graphlab::iarchive iarc(strm);
      for(size_t i = 0; i < recv_data.size(); ++i) {
        iarc >> recv_data[i];
      }
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of mpi all to all


    /**
     * called on the root.  must be matched with gather(const T& elem);
     */
    template<typename T>
    void gather(size_t root, const T& elem) {
#ifdef HAS_MPI
       // Get the mpi rank and size
      assert(root < size_t(std::numeric_limits<int>::max()));
      int mpi_root(root);

      // Serialize the local map
      graphlab::charstream cstrm(128);
      graphlab::oarchive oarc(cstrm);
      oarc << elem;
      cstrm.flush();
      char* send_buffer = cstrm->c_str();
      int send_buffer_size = cstrm->size();
      assert(send_buffer_size >= 0);

      // compute the sizes
      // Compute the sizes
      int error = MPI_Gather(&send_buffer_size,  // Send buffer
                             1,                  // send count
                             MPI_INT,            // send type
                             NULL,               // recvbuffer
                             1,                  // recvcount
                             MPI_INT,           // recvtype
                             mpi_root,          // root rank
                             MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);


      // recv all the maps
      error = MPI_Gatherv(send_buffer,         // send buffer
                          send_buffer_size,    // how much to send
                          MPI_BYTE,            // send type
                          NULL,                // recv buffer
                          NULL,                // amount to recv
                                               // for each cpuess
                          NULL,                // where to place data
                          MPI_BYTE,
                          mpi_root,            // root rank
                          MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of gather


    /**
     * called on the root.  must be matched with gather(const T& elem);
     */
    template<typename T>
    void gather(const T& elem, std::vector<T>& results) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      size_t mpi_size(size());
      int mpi_rank(rank());
      if(results.size() != mpi_size) results.resize(mpi_size);

      // Serialize the local map
      graphlab::charstream cstrm(128);
      graphlab::oarchive oarc(cstrm);
      oarc << elem;
      cstrm.flush();
      char* send_buffer = cstrm->c_str();
      int send_buffer_size = cstrm->size();
      assert(send_buffer_size >= 0);

      // compute the sizes
      std::vector<int> recv_sizes(mpi_size, -1);
      // Compute the sizes
      int error = MPI_Gather(&send_buffer_size,  // Send buffer
                             1,                  // send count
                             MPI_INT,            // send type
                             &(recv_sizes[0]),  // recvbuffer
                             1,                  // recvcount
                             MPI_INT,           // recvtype
                             mpi_rank,          // root rank
                             MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
      for(size_t i = 0; i < recv_sizes.size(); ++i)
        assert(recv_sizes[i] >= 0);


      // Construct offsets
      std::vector<int> recv_offsets(recv_sizes);
      int sum = 0, tmp = 0;
      for(size_t i = 0; i < recv_offsets.size(); ++i) {
        tmp = recv_offsets[i]; recv_offsets[i] = sum; sum += tmp;
      }

      // if necessary realloac recv_buffer
      std::vector<char> recv_buffer(sum);

      // recv all the maps
      error = MPI_Gatherv(send_buffer,         // send buffer
                          send_buffer_size,    // how much to send
                          MPI_BYTE,            // send type
                          &(recv_buffer[0]),   // recv buffer
                          &(recv_sizes[0]),    // amount to recv
                                               // for each cpuess
                          &(recv_offsets[0]),  // where to place data
                          MPI_BYTE,
                          mpi_rank,            // root rank
                          MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
      // Update the local map
      namespace bio = boost::iostreams;
      typedef bio::stream<bio::array_source> icharstream;
      icharstream strm(&(recv_buffer[0]), recv_buffer.size());
      graphlab::iarchive iarc(strm);
      for(size_t i = 0; i < results.size(); ++i) {
        iarc >> results[i];
      }
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of gather


    /**
     * called on the root.  must be matched with gather(const T& elem);
     */
    template<typename T>
    void bcast(const size_t& root, T& elem) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      if(mpi_tools::rank() == root) {
        // serialize the object
        graphlab::charstream cstrm(128);
        graphlab::oarchive oarc(cstrm);
        oarc << elem;
        cstrm.flush();
        char* send_buffer = cstrm->c_str();
        int send_buffer_size = cstrm->size();
        assert(send_buffer_size >= 0);

        // send the ammount to send
        int error = MPI_Bcast(&send_buffer_size,  // Send buffer
                              1,                  // send count
                              MPI_INT,            // send type
                              root,               // root rank
                              MPI_COMM_WORLD);
        assert(error == MPI_SUCCESS);

        // send the actual data
        error = MPI_Bcast(send_buffer,  // Send buffer
                          send_buffer_size,    // send count
                          MPI_BYTE,            // send type
                          root,               // root rank
                          MPI_COMM_WORLD);
        assert(error == MPI_SUCCESS);

      } else {
        int recv_buffer_size(-1);
        // recv the ammount the required buffer size
        int error = MPI_Bcast(&recv_buffer_size,  // recvbuffer
                              1,                  // recvcount
                              MPI_INT,            // recvtype
                              root,               // root rank
                              MPI_COMM_WORLD);
        assert(error == MPI_SUCCESS);
        assert(recv_buffer_size >= 0);

        std::vector<char> recv_buffer(recv_buffer_size);
        error = MPI_Bcast(&(recv_buffer[0]),  // recvbuffer
                          recv_buffer_size,                  // recvcount
                          MPI_BYTE,            // recvtype
                          root,               // root rank
                          MPI_COMM_WORLD);
        assert(error == MPI_SUCCESS);
        // construct the local element
        namespace bio = boost::iostreams;
        typedef bio::stream<bio::array_source> icharstream;
        icharstream strm(&(recv_buffer[0]), recv_buffer.size());
        graphlab::iarchive iarc(strm);
        iarc >> elem;

      }
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of bcast


    template<typename T>
    void send(const T& elem, const size_t id, const int tag = 0) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      assert(id < size());
      // Serialize the local map
      graphlab::charstream cstrm(128);
      graphlab::oarchive oarc(cstrm);
      oarc << elem;
      cstrm.flush();
      char* send_buffer = cstrm->c_str();
      int send_buffer_size = cstrm->size();
      assert(send_buffer_size >= 0);

      int dest(id);
      // send the size
      int error = MPI_Send(&send_buffer_size,  // Send buffer
                           1,                  // send count
                           MPI_INT,            // send type
                           dest,               // destination
                           tag,                  // tag
                           MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);

      // send the actual content
      error = MPI_Send(send_buffer,         // send buffer
                       send_buffer_size,    // how much to send
                       MPI_BYTE,            // send type
                       dest,
                       tag,
                       MPI_COMM_WORLD);
      assert(error == MPI_SUCCESS);
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    } // end of send


    template<typename T>
    void recv(T& elem, const size_t id, const int tag = 0) {
#ifdef HAS_MPI
      // Get the mpi rank and size
      assert(id < size());

      int recv_buffer_size(-1);
      int dest(id);
      MPI_Status status;
      // recv the size
      int error = MPI_Recv(&recv_buffer_size,
                           1,
                           MPI_INT,
                           dest,
                           tag,
                           MPI_COMM_WORLD,
                           &status);
      assert(error == MPI_SUCCESS);
      assert(recv_buffer_size > 0);

      std::vector<char> recv_buffer(recv_buffer_size);
      // recv the actual content
      error = MPI_Recv(&(recv_buffer[0]),
                       recv_buffer_size,
                       MPI_BYTE,
                       dest,
                       tag,
                       MPI_COMM_WORLD,
                       &status);
      assert(error == MPI_SUCCESS);
      // deserialize
      // Update the local map
      namespace bio = boost::iostreams;
      typedef bio::stream<bio::array_source> icharstream;
      icharstream strm(&(recv_buffer[0]), recv_buffer.size());
      graphlab::iarchive iarc(strm);
      iarc >> elem;
#else
      logstream(LOG_FATAL) << "MPI not installed!" << std::endl;
#endif
    }


    void get_master_ranks(std::set<size_t>& master_ranks);


  }; // end of namespace mpi tools
}; //end of graphlab namespace
#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/util/mutable_queue.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// Probabilistic Reasoning Library (PRL)
// Copyright 2005, 2008 (see AUTHORS.txt for a list of contributors)
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

#ifndef GRAPHLAB_MUTABLE_PRIORITY_QUEUE_HPP
#define GRAPHLAB_MUTABLE_PRIORITY_QUEUE_HPP

#include <vector>
#include <map>
#include <algorithm>
#include <boost/unordered_map.hpp>

#include <graphlab/macros_def.hpp>

namespace graphlab {

  // Deprecated judy has trick
  // template <typename T, typename Compare, typename Hash>
  // class index_type_selector;
  // template <typename T, typename Compare>
  // class index_type_selector<T, Compare, void> {
  //   public:
  //    typedef std::map<T, size_t, Compare> index_map_type;
  // };
  // template <typename T, typename Compare, typename Hash>
  // class index_type_selector {
  //   public:
  //        typedef judy_map_m<T, size_t, Hash, Compare> index_map_type;
  // };


  /**
   * A heap implementation of a priority queue that supports external
   * priorities and priority updates. Both template arguments must be
   * Assignable, EqualityComparable, and LessThanComparable.
   *
   * @param T
   *        the type of items stored in the priority queue.
   * @param Priority
   *        the type used to prioritize items.
   *
   * @see Boost's mutable_queue in boost/pending/mutable_queue.hpp
   * @todo Add a comparator
   *
   * \ingroup util
   */
  template <typename T, typename Priority>
  class mutable_queue {
  public:

    //! An element of the heap.
    typedef typename std::pair<T, Priority> heap_element;

  protected:

    //! The storage type of the index map
    typedef boost::unordered_map<T, size_t> index_map_type;

    //typedef judy_map_m<T, size_t, Compare> index_map_type;
    // Deprecated judy hash trick
    // typedef typename index_type_selector<T, Compare, Hash>::
    // index_map_type index_map_type;

    //! The heap used to store the elements. The first element is unused.
    std::vector<heap_element> heap;

    //! The map used to map from items to indexes in the heap.
    index_map_type index_map;

    //! Returns the index of the left child of the supplied index.
    size_t left(size_t i) const { 
      return 2 * i; 
    }

    //! Returns the index of the right child of the supplied index.
    size_t right(size_t i) const { 
      return 2 * i + 1; 
    }

    //! Returns the index of the parent of the supplied index.
    size_t parent(size_t i) const { 
      return i / 2; 
    }

    //! Extracts the priority at a heap location.
    Priority priority_at(size_t i) { 
      return heap[i].second; 
    }

    //! Compares the priorities at two heap locations.
    bool less(size_t i, size_t j) {
      return heap[i].second < heap[j].second;
    }

    //! Swaps the heap locations of two elements.
    void swap(size_t i, size_t j) {
      std::swap(heap[i], heap[j]);
      index_map[heap[i].first] = i;
      index_map[heap[j].first] = j;
    }

    //! The traditional heapify function.
    void heapify(size_t i) {
      size_t l = left(i);
      size_t r = right(i);
      size_t s = size();
      size_t largest = i;
      if ((l <= s) && less(i, l))
        largest = l;
      if ((r <= s) && less(largest, r))
        largest = r;
      if (largest != i) {
        swap(i, largest);
        heapify(largest);
      }
    }

  public:
    //! Default constructor.
    mutable_queue()
      : heap(1, std::make_pair(T(), Priority())) { }

    mutable_queue(const mutable_queue& other) :
    heap(other.heap), index_map(other.index_map) { }

    mutable_queue& operator=(const mutable_queue& other) { 
      index_map = other.index_map;
      heap = other.heap;
      return *this;
    }
    
    //! Returns the number of elements in the heap.
    size_t size() const {
      return heap.size() - 1;
    }

    //! Returns true iff the queue is empty.
    bool empty() const {
      return size() == 0;
    }

    //! Returns true if the queue contains the given value
    bool contains(const T& item) const {
      return index_map.count(item) > 0;
    }

    //! Enqueues a new item in the queue.
    void push(T item, Priority priority) {
      heap.push_back(std::make_pair(item, priority));
      size_t i = size();
      index_map[item] = i;
      while ((i > 1) && (priority_at(parent(i)) <= priority)) {
        swap(i, parent(i));
        i = parent(i);
      }
    }

    //! Accesses the item with maximum priority in the queue.
    const std::pair<T, Priority>& top() const {
      assert(!empty());
      return heap[1];
    }

    /**
     * Removes the item with maximum priority from the queue, and
     * returns it with its priority.
     */
    std::pair<T, Priority> pop() {
      assert(!empty());
      heap_element top = heap[1];
      swap(1, size());
      heap.pop_back();
      heapify(1);
      index_map.erase(top.first);
      return top;
    }

    //! Returns the weight associated with a key
    Priority get(T item) const {
      typename index_map_type::const_iterator iter = index_map.find(item);
      assert(iter != index_map.end());
      size_t i = iter->second;
      return heap[i].second;
    }

    //! Returns the priority associated with a key
    Priority operator[](T item) const {
      return get(item);
    }

    /** 
     * Updates the priority associated with a item in the queue. This
     * function fails if the item is not already present.
    */
    void update(T item, Priority priority) {
      // Verify that the item is currently in the queue
      typename index_map_type::const_iterator iter = index_map.find(item);
      assert(iter != index_map.end());
      // If it is already present update the priority
      size_t i = iter->second;
      heap[i].second = priority;
      while ((i > 1) && (priority_at(parent(i)) < priority)) {
        swap(i, parent(i));
        i = parent(i);
      }
      heapify(i);
    }

    /** 
     * Updates the priority associated with a item in the queue. 
     * If the item is not already present, insert it.
    */
    void push_or_update(T item, Priority priority) {
      // Verify that the item is currently in the queue
      typename index_map_type::const_iterator iter = index_map.find(item);
      if(iter != index_map.end()) {
        // If it is already present update the priority
        size_t i = iter->second;
        heap[i].second = priority;
        while ((i > 1) && (priority_at(parent(i)) < priority)) {
          swap(i, parent(i));
          i = parent(i);
        }
        heapify(i);
      }
      else {
        push(item, priority);
      }
    }
    
    /**
     * If item is already in the queue, sets its priority to the maximum
     * of the old priority and the new one. If the item is not in the queue,
     * adds it to the queue.
     *
     * returns true if the items was not already  
     */
    bool insert_max(T item, Priority priority) {
      // determine if the item is already in the queue
      typename index_map_type::const_iterator iter = index_map.find(item);
      if(iter != index_map.end()) { // already present
        // If it is already present update the priority
        size_t i = iter->second;
        heap[i].second = std::max(priority, heap[i].second);
        // If the priority went up move the priority until its greater
        // than its parent
        while ((i > 1) && (priority_at(parent(i)) <= priority)) {
          swap(i, parent(i));
          i = parent(i);
        } 
        // Trickle down if necessary
        heapify(i);  // This should not be necessary
        return false;
      } else { // not already present so simply add
        push(item, priority);
        return true;
      }
    }

    /**
     * If item is already in the queue, sets its priority to the sum
     * of the old priority and the new one. If the item is not in the queue,
     * adds it to the queue.
     *
     * returns true if the item was already present
     */
    bool insert_cumulative(T item, Priority priority) {
      // determine if the item is already in the queue
      typename index_map_type::const_iterator iter = index_map.find(item);
      if(iter != index_map.end()) { // already present
        // If it is already present update the priority
        size_t i = iter->second;
        heap[i].second = priority + heap[i].second;
        // If the priority went up move the priority until its greater
        // than its parent
        while ((i > 1) && (priority_at(parent(i)) <= priority)) {
          swap(i, parent(i));
          i = parent(i);
        } 
        // Trickle down if necessary
        heapify(i);  // This should not be necessary
        return false;
      } else { // not already present so simply add
        push(item, priority);
        return true;
      }
    } // end of insert cumulative
    

    //! Returns the values (key-priority pairs) in the priority queue
    const std::vector<heap_element>& values() const {
      return heap; 
    }

    //! Clears all the values (equivalent to stl clear)
    void clear() {
      heap.clear();
      heap.push_back(std::make_pair(T(), Priority()));
      index_map.clear();
    }

    //! Remove an item from the queue
    bool remove(T item) {
      // Ensure that the element is in the queue
      typename index_map_type::iterator iter = index_map.find(item);
      // only if the element is present in the first place do we need
      // remove it
      if(iter != index_map.end()) {
        size_t i = iter->second;
        swap(i, size());
        heap.pop_back();
        heapify(i);
        // erase the element from the index map
        index_map.erase(iter);
        return true;
      } 
      return false;
    }

  }; // class mutable_queue


//   // define a blank cosntant for the mutable queue
// #define BLANK (size_t(-1))

//   template <typename Priority>
//   class mutable_queue<size_t, Priority> {
//   public:

    
//     //! An element of the heap.
//     typedef typename std::pair<size_t, Priority> heap_element;

//     typedef size_t index_type;

//   protected:

//     //! The storage type of the index map
//     typedef std::vector<index_type> index_map_type;

//     //! The heap used to store the elements. The first element is unused.
//     std::vector<heap_element> heap;

//     //! The map used to map from items to indexes in the heap.
//     index_map_type index_map;

//     //! Returns the index of the left child of the supplied index.
//     size_t left(size_t i) const { 
//       return 2 * i; 
//     }

//     //! Returns the index of the right child of the supplied index.
//     size_t right(size_t i) const { 
//       return 2 * i + 1; 
//     }

//     //! Returns the index of the parent of the supplied index.
//     size_t parent(size_t i) const { 
//       return i / 2; 
//     }

//     //! Extracts the priority at a heap location.
//     Priority priority_at(size_t i) { 
//       return heap[i].second; 
//     }

//     //! Compares the priorities at two heap locations.
//     bool less(size_t i, size_t j) {
//       assert( i < heap.size() );
//       assert( j < heap.size() );
//       return heap[i].second < heap[j].second;
//     }

//     //! Swaps the heap locations of two elements.
//     void swap(size_t i, size_t j) {
//       if(i == j) return;
//       std::swap(heap[i], heap[j]);
//       assert(heap[i].first < index_map.size());
//       assert(heap[j].first < index_map.size());
//       index_map[heap[i].first] = i;
//       index_map[heap[j].first] = j;
//     }

//     //! The traditional heapify function.
//     void heapify(size_t i) {
//       size_t l = left(i);
//       size_t r = right(i);
//       size_t s = size();
//       size_t largest = i;
//       if ((l <= s) && less(i, l))
//         largest = l;
//       if ((r <= s) && less(largest, r))
//         largest = r;
//       if (largest != i) {
//         swap(i, largest);
//         heapify(largest);
//       }
//     }

//   public:
//     //! Default constructor.
//     mutable_queue()
//       :	heap(1, std::make_pair(-1, Priority())) { }

//     //! Returns the number of elements in the heap.
//     size_t size() const {
//       assert(heap.size() > 0);
//       return heap.size() - 1;
//     }

//     //! Returns true iff the queue is empty.
//     bool empty() const {
//       return size() == 0;
//     }

//     //! Returns true if the queue contains the given value
//     bool contains(const size_t& item) const {
//       return item < index_map.size() &&
//         index_map[item] != BLANK;
//     }

//     //! Enqueues a new item in the queue.
//     void push(size_t item, Priority priority) {
//       assert(item != BLANK);      
//       heap.push_back(std::make_pair(item, priority));
//       size_t i = size();
//       if ( !(item < index_map.size()) ) {
//         index_map.resize(item + 1, BLANK);
//       }
//       // Bubble up
//       index_map[item] = i;
//       while ((i > 1) && (priority_at(parent(i)) < priority)) {
//         swap(i, parent(i));
//         i = parent(i);
//       }
//     }

//     //! Accesses the item with maximum priority in the queue.
//     const std::pair<size_t, Priority>& top() const {
//       assert(heap.size() > 1);
//       return heap[1];
//     }

//     /**
//      * Removes the item with maximum priority from the queue, and
//      * returns it with its priority.
//      */
//     std::pair<size_t, Priority> pop() {
//       assert(heap.size() > 1);
//       heap_element top = heap[1];
//       assert(top.first < index_map.size());
//       swap(1, size());
//       heap.pop_back();
//       heapify(1);
//       index_map[top.first] = BLANK;
//       return top;
//     }

//     //! Returns the weight associated with a key
//     Priority get(size_t item) const {
//       assert(item < index_map.size());
//       assert(index_map[item] != BLANK);
//       return heap[index_map[item]].second;
//     }

//     //! Returns the priority associated with a key
//     Priority operator[](size_t item) const {
//       return get(item);
//     }

//     /** 
//      * Updates the priority associated with a item in the queue. This
//      * function fails if the item is not already present.
//     */
//     void update(size_t item, Priority priority) {
//       assert(item < index_map.size());
//       size_t i = index_map[item];
//       heap[i].second = priority;
//       while ((i > 1) && (priority_at(parent(i)) < priority)) {
//         swap(i, parent(i));
//         i = parent(i);
//       }
//       heapify(i);
//     }

//     /**
//      * If item is already in the queue, sets its priority to the maximum
//      * of the old priority and the new one. If the item is not in the queue,
//      * adds it to the queue.
//      */
//     void insert_max(size_t item, Priority priority) {
//       assert(item != BLANK);
//       if(!contains(item))
//         push(item, priority);
//       else {
//         Priority effective_priority = std::max(get(item), priority);
//         update(item, effective_priority);
//       }
//     }

//     //! Returns the values (key-priority pairs) in the priority queue
//     const std::vector<heap_element>& values() const {
//       return heap; 
//     }

//     //! Clears all the values (equivalent to stl clear)
//     void clear() {
//       heap.clear();
//       heap.push_back(std::make_pair(-1, Priority()));
//       index_map.clear();
//     }

//     /**
//      * Remove an item from the queue returning true if the item was
//      * originally present
//      */
//     bool remove(size_t item) {
//       if(contains(item)) {
//         assert(size() > 0);
//         assert(item < index_map.size());
//         size_t i = index_map[item];
//         assert(i != BLANK);
//         swap(i, size());
//         heap.pop_back();
//         heapify(i);        
//         // erase the element from the index map
//         index_map[item] = BLANK;
//         return true;
//       } else {
//         // Item was not present
//         return false;
//       }
//     }
//   }; // class mutable_queue

// #undef BLANK


} // namespace graphlab

#include <graphlab/macros_undef.hpp>

#endif // #ifndef GRAPHLAB_MUTABLE_PRIORITY_QUEUE_HPP


================================================
FILE: src/graphlab/util/net_util.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstring>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <sys/types.h>
#include <ifaddrs.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/util/net_util.hpp>
namespace graphlab {
 

bool str_to_ip(const char* c, uint32_t& out) {
  if (c == NULL) return false;
  else return inet_pton(AF_INET, c, &out) > 0;
}

bool ip_to_str(uint32_t ip, std::string& out) {
  char ipstring[INET_ADDRSTRLEN] = {0};
  const char* ret = inet_ntop(AF_INET, &ip, ipstring, INET_ADDRSTRLEN);
  if (ret == NULL) return false;
  out = std::string(ipstring);
  return true;
}


std::string get_local_ip_as_str(bool print) {
  uint32_t ip = get_local_ip(print);
  if (ip == 0) return "127.0.0.1";
  else {
    std::string out;
    bool ip_conversion_success = ip_to_str(ip, out);
    ASSERT_TRUE(ip_conversion_success);
    return out;
  }
}

uint32_t get_local_ip(bool print) {
  // see if GRAPHLAB_SUBNET environment variable is set
  char* c_subnet_id = getenv("GRAPHLAB_SUBNET_ID");
  char* c_subnet_mask = getenv("GRAPHLAB_SUBNET_MASK");
  uint32_t subnet_id = 0;
  uint32_t subnet_mask = 0;
  std::string str_subnet_id, str_subnet_mask;
  // try to convert to a valid address when possible
  if (c_subnet_id != NULL) {
    if (!str_to_ip(c_subnet_id, subnet_id)) {
      std::cout << "Unable to convert GRAPHLAB_SUBNET_ID to a valid address. Cannot continue\n";
      exit(1); 
    }
  }
  if (c_subnet_mask != NULL) {
    if (!str_to_ip(c_subnet_mask, subnet_mask)) {
      std::cout << "Unable to convert GRAPHLAB_SUBNET_MASK to a valid address. Cannot continue\n";
      exit(1); 
    }
  }

  // error checking. 
  // By the end of this block, we should either have both subnet_id and subnet_mask filled
  // to reasonable values, or are dead.
  
  if (c_subnet_id == NULL && c_subnet_mask != NULL) {
    // If subnet mask specified but not subnet ID, we cannot continue.
    std::cout << "GRAPHLAB_SUBNET_MASK specified, but GRAPHLAB_SUBNET_ID not specified.\n";
    std::cout << "We cannot continue\n";
    exit(1);
  } 
  if (c_subnet_id != NULL && c_subnet_mask == NULL) {
    if (print) {
      std::cout << "GRAPHLAB_SUBNET_ID specified, but GRAPHLAB_SUBNET_MASK not specified.\n";
      std::cout << "We will try to guess a subnet mask\n";
    }
    // if subnet id specified, but not subnet mask. We can try to guess a mask 
    // by finding the first "on" bit in the subnet id, and matching everything
    // to the left of it.
    // easiest way to do that is to left extend the subnet_id
    subnet_mask = subnet_id;
    subnet_mask = ntohl(subnet_mask);
    subnet_mask = subnet_mask | (subnet_mask << 1);
    subnet_mask = subnet_mask | (subnet_mask << 2);
    subnet_mask = subnet_mask | (subnet_mask << 4);
    subnet_mask = subnet_mask | (subnet_mask << 8);
    subnet_mask = subnet_mask | (subnet_mask << 16);
    subnet_mask = htonl(subnet_mask);
  }
  else {
    if (print) {
      std::cout << "GRAPHLAB_SUBNET_ID/GRAPHLAB_SUBNET_MASK environment variables not defined.\n";
      std::cout << "Using default values\n";
    }
  }
  ip_to_str(subnet_id, str_subnet_id);
  ip_to_str(subnet_mask, str_subnet_mask);
  
  // make sure this is a valid subnet address.
  if (print) {
      std::cout << "Subnet ID: " << str_subnet_id << "\n";
      std::cout << "Subnet Mask: " << str_subnet_mask << "\n";
      std::cout << "Will find first IPv4 non-loopback address matching the subnet" << std::endl;
  }
  uint32_t ip(0);
  // code adapted from
  struct ifaddrs * ifAddrStruct = NULL;
  getifaddrs(&ifAddrStruct);
  struct ifaddrs * firstifaddr = ifAddrStruct;
  ASSERT_NE(ifAddrStruct, NULL);
  bool success = false;
  while (ifAddrStruct != NULL) {
    if (ifAddrStruct->ifa_addr != NULL && 
        ifAddrStruct->ifa_addr->sa_family == AF_INET) {
      char* tmpAddrPtr = NULL;
      // check it is IP4 and not lo0.
      tmpAddrPtr = (char*)&((struct sockaddr_in *)ifAddrStruct->ifa_addr)->sin_addr;
      ASSERT_NE(tmpAddrPtr, NULL);
      if (tmpAddrPtr[0] != 127) {
        memcpy(&ip, tmpAddrPtr, 4);
        // test if it matches the subnet
        if ((ip & subnet_mask) == subnet_id) {
          success = true;
          break;
        }
      }
      //break;
    }
    ifAddrStruct=ifAddrStruct->ifa_next;
  }
  freeifaddrs(firstifaddr);
  if (!success) {
    // if subnet addresses specified, and if we cannot find a valid network. Fail."
    if (c_subnet_id!= NULL) {
      std::cout << "Unable to find a network matching the requested subnet\n";
      exit(1);
    } else {
      std::cout << "Unable to find any valid IPv4 address. Defaulting to loopback\n";
    }
  }
  return ip;
}

std::pair<size_t, int> get_free_tcp_port() {
  int sock = socket(AF_INET, SOCK_STREAM, 0);
  // uninteresting boiler plate. Set the port number and socket type
  sockaddr_in my_addr;
  my_addr.sin_family = AF_INET;
  my_addr.sin_port = 0; // port 0.
  my_addr.sin_addr.s_addr = INADDR_ANY;
  memset(&(my_addr.sin_zero), '\0', 8);
  if (bind(sock, (sockaddr*)&my_addr, sizeof(my_addr)) < 0){
    logger(LOG_FATAL, "Failed to bind to a port 0! Unable to acquire a free TCP port!");
  }
  // get the sock information
  socklen_t slen;
  sockaddr addr;
  slen = sizeof(sockaddr);
  if (getsockname(sock, &addr, &slen) < 0) {
    logger(LOG_FATAL, "Failed to get port information about bound socket");
  }
  size_t freeport = ntohs(((sockaddr_in*)(&addr))->sin_port);
  std::pair<size_t, int> ret(freeport, sock);
  return ret;
}

} // namespace graphlab


================================================
FILE: src/graphlab/util/net_util.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_NET_UTIL_HPP
#define GRAPHLAB_NET_UTIL_HPP
#include <string>
#include <stdint.h>

namespace graphlab {
  /**
  * \ingroup util
  * Returns the first non-localhost ipv4 address 
  */
  uint32_t get_local_ip(bool print = true);

  /**
  * \ingroup util
  * Returns the first non-localhost ipv4 address as a standard dot delimited string
  */
  std::string get_local_ip_as_str(bool print = true);
  /** \ingroup util 
   * Find a free tcp port and binds it. Caller must release the port.
   * Returns a pair of [port, socket handle]
   */
  std::pair<size_t, int> get_free_tcp_port();
};

#endif


================================================
FILE: src/graphlab/util/random.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <pthread.h>

#include <set>
#include <iostream>
#include <fstream>

#include <boost/random.hpp>
#include <boost/integer_traits.hpp>

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/util/timer.hpp>

#include <graphlab/util/random.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {
  namespace random {

    /**
     * A truely nondeterministic generator
     */
    class nondet_generator {
    public:
      static nondet_generator& global() {
        static nondet_generator global_gen;
        return global_gen;
      }

      typedef size_t result_type;
      BOOST_STATIC_CONSTANT(result_type, min_value = 
                            boost::integer_traits<result_type>::const_min);
      BOOST_STATIC_CONSTANT(result_type, max_value = 
                            boost::integer_traits<result_type>::const_max);
      result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return min_value; }
      result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { return max_value; }
      
      nondet_generator() {
        rnd_dev.open("/dev/urandom", std::ios::binary | std::ios::in);
        ASSERT_TRUE(rnd_dev.good());
      }
      // Close the random number generator
      ~nondet_generator() { rnd_dev.close(); }
      // read a size_t from the source
      result_type operator()() {
        // read a machine word into result
        result_type result(0);
        mut.lock();
        ASSERT_TRUE(rnd_dev.good());
        rnd_dev.read(reinterpret_cast<char*>(&result), sizeof(result_type));
        ASSERT_TRUE(rnd_dev.good());
        mut.unlock();
        //        std::cout << result << std::endl;
        return result;
      }      
    private:
      std::ifstream rnd_dev;
      mutex mut;
    };
    //nondet_generator global_nondet_rng;


    /**
     * This class represents a master registery of all active random
     * number generators
     */
    struct source_registry {
      std::set<generator*> generators;
      generator master;
      mutex mut;

      static source_registry& global() {
        static source_registry registry;
        return registry;
      }
      /**
       * Seed all threads using the default seed
       */
      void seed() {
        mut.lock();
        master.seed();
        foreach(generator* generator, generators) {
          ASSERT_TRUE(generator != NULL);
          generator->seed(master);
        }
        mut.unlock();
      }

      /**
       * Seed all threads using the default seed
       */
      void nondet_seed() {
        mut.lock();
        master.nondet_seed();
        foreach(generator* generator, generators) {
          ASSERT_TRUE(generator != NULL);
          generator->seed(master);
        }
        mut.unlock();
      }


      /**
       * Seed all threads using the default seed
       */
      void time_seed() {
        mut.lock();
        master.time_seed();
        foreach(generator* generator, generators) {
          ASSERT_TRUE(generator != NULL);
          generator->seed(master);
        }
        mut.unlock();
      }

      
      /**
       *  Seed all threads with a fixed number
       */     
      void seed(const size_t number) {
        mut.lock();
        master.seed(number);
        foreach(generator* generator, generators) {
          ASSERT_TRUE(generator != NULL);
          generator->seed(master);
        }
        mut.unlock();
      }
      
      /**
       * Register a source with the registry and seed it based on the
       * master.
       */
      void register_generator(generator* tls_ptr) {
        ASSERT_TRUE(tls_ptr != NULL);
        mut.lock();
        generators.insert(tls_ptr);
        tls_ptr->seed(master);
        // std::cout << "Generator created" << std::endl;
        // __print_back_trace();
        mut.unlock();
      }
      
      /**
       * Unregister a source from the registry
       */
      void unregister_source(generator* tls_ptr) {
        mut.lock();
        generators.erase(tls_ptr);
        mut.unlock();
      }
    };
    // source_registry registry;


    //////////////////////////////////////////////////////////////
    /// Pthread TLS code

    /**
     * this function is responsible for destroying the random number
     * generators
     */
    void destroy_tls_data(void* ptr) {
      generator* tls_rnd_ptr = 
        reinterpret_cast<generator*>(ptr);
      if(tls_rnd_ptr != NULL) { 
        /// TOFIX: This has issues... The global may have been destroyed already
        //source_registry::global().unregister_source(tls_rnd_ptr);
        delete tls_rnd_ptr; 
      }
    }


    /**
     * Simple struct used to construct the thread local storage at
     * startup.
     */
    struct tls_key_creator {
      pthread_key_t TLS_RANDOM_SOURCE_KEY;
      tls_key_creator() : TLS_RANDOM_SOURCE_KEY(0) {
        pthread_key_create(&TLS_RANDOM_SOURCE_KEY,
                           destroy_tls_data);
      }
    };
    // This function is to be called prior to any access to the random
    // source
    static pthread_key_t get_random_source_key() {
      static const tls_key_creator key;
      return key.TLS_RANDOM_SOURCE_KEY;
    }
    // This forces __init_keys__ to be called prior to main.
    static pthread_key_t __unused_init_keys__(get_random_source_key());

  // the combination of the two mechanisms above will force the
  // thread local store to be initialized
  // 1: before main
  // 2: before any use of random by global variables.
  // KNOWN_ISSUE: if a global variable (initialized before main)
  //               spawns threads which then call random. Things explode.
  
    
    /////////////////////////////////////////////////////////////
    //// Implementation of header functions
    
       
    generator& get_source() {
      // get the thread local storage
      generator* tls_rnd_ptr = 
        reinterpret_cast<generator*>
        (pthread_getspecific(get_random_source_key()));
      // Create a tls_random_source if none was provided
      if(tls_rnd_ptr == NULL) {
        tls_rnd_ptr = new generator();      
        assert(tls_rnd_ptr != NULL);
        // This will seed it with the master rng
        source_registry::global().register_generator(tls_rnd_ptr);
        pthread_setspecific(get_random_source_key(), 
                            tls_rnd_ptr);      
      }
      // assert(tls_rnd_ptr != NULL);
      return *tls_rnd_ptr;
    } // end of get local random source


    void seed() { source_registry::global().seed();  } 

    void nondet_seed() { source_registry::global().nondet_seed(); } 

    void time_seed() { source_registry::global().time_seed(); } 

    void seed(const size_t seed_value) { 
      source_registry::global().seed(seed_value);  
    } 


    void generator::nondet_seed() {
      // Get the global nondeterministic random number generator.
      nondet_generator& nondet_rnd(nondet_generator::global());
      mut.lock();
      // std::cout << "initializing real rng" << std::endl;
      real_rng.seed(nondet_rnd());
      // std::cout << "initializing discrete rng" << std::endl;
      discrete_rng.seed(nondet_rnd());
      // std::cout << "initializing fast discrete rng" << std::endl;
      fast_discrete_rng.seed(nondet_rnd());
      mut.unlock();
    }


    void pdf2cdf(std::vector<double>& pdf) {
      double Z = 0;
      for(size_t i = 0; i < pdf.size(); ++i) Z += pdf[i];
      for(size_t i = 0; i < pdf.size(); ++i)
        pdf[i] = pdf[i]/Z + ((i>0)? pdf[i-1] : 0);
    } // end of pdf2cdf


  }; // end of namespace random

};// end of namespace graphlab


================================================
FILE: src/graphlab/util/random.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_RANDOM_HPP
#define GRAPHLAB_RANDOM_HPP

#include <cstdlib>
#include <stdint.h>


#include <vector>
#include <limits>
#include <algorithm>

#include <boost/random.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/parallel/pthread_tools.hpp>

namespace graphlab {

  /**
   * \ingroup random
   * A collection of thread safe random number routines.  Each thread
   * is assigned its own generator however assigning a seed affects
   * all current and future generators.
   */
  namespace random {        


    ///////////////////////////////////////////////////////////////////////
    //// Underlying generator definition


    namespace distributions {
      /**
       * The uniform distribution struct is used for partial function
       * specialization. Generating uniform random real numbers is
       * accomplished slightly differently than for integers.
       * Therefore the base case is for integers and we then
       * specialize the two real number types (floats and doubles).
       */
      template<typename IntType>
      struct uniform {
        typedef boost::uniform_int<IntType> distribution_type;
        template<typename RealRNG, typename DiscreteRNG>
        static inline IntType sample(RealRNG& real_rng, 
                                     DiscreteRNG& discrete_rng, 
                                     const IntType& min, const IntType& max) {
          return distribution_type(min, max)(discrete_rng);
        }
      };
      template<>
      struct uniform<double> {
        typedef boost::uniform_real<double> distribution_type;
        template<typename RealRNG, typename DiscreteRNG>
        static inline double sample(RealRNG& real_rng, 
                                    DiscreteRNG& discrete_rng, 
                                    const double& min, const double& max) {
          return distribution_type(min, max)(real_rng);
        }
      };
      template<>
      struct uniform<float> {
        typedef boost::uniform_real<float> distribution_type;
        template<typename RealRNG, typename DiscreteRNG>
        static inline float sample(RealRNG& real_rng, 
                                  DiscreteRNG& discrete_rng, 
                                  const float& min, const float& max) {
          return distribution_type(min, max)(real_rng);
        }
      };
    }; // end of namespace distributions

    /**
     * The generator class is the base underlying type used to
     * generate random numbers.  User threads should use the functions
     * provided in the random namespace.
     */
    class generator {
    public:
      // base Generator types
      typedef boost::lagged_fibonacci607 real_rng_type;
      typedef boost::mt11213b            discrete_rng_type;  
      typedef boost::rand48              fast_discrete_rng_type;       
    
      generator() {
        time_seed();
      }
    
      //! Seed the generator using the default seed
      inline void seed() {
        mut.lock();
        real_rng.seed();
        discrete_rng.seed();
        fast_discrete_rng.seed();
        mut.unlock();
      }

      //! Seed the generator nondeterministically
      void nondet_seed();


      //! Seed the generator using the current time in microseconds
      inline void time_seed() {
        seed( graphlab::timer::usec_of_day() );
      }

      //! Seed the random number generator based on a number
      void seed(size_t number) {
        mut.lock();
        fast_discrete_rng.seed(number);
        real_rng.seed(fast_discrete_rng);
        discrete_rng.seed(fast_discrete_rng);
        mut.unlock();
      }
      
      //! Seed the generator using another generator
      void seed(generator& other){
        mut.lock();
        real_rng.seed(other.real_rng);
        discrete_rng.seed(other.discrete_rng);
        fast_discrete_rng.seed(other.fast_discrete_rng());
        mut.unlock();
      } 
   
      /**
       * Generate a random number in the uniform real with range [min,
       * max) or [min, max] if the number type is discrete.
       */
      template<typename NumType>
      inline NumType uniform(const NumType min, const NumType max) { 
        mut.lock();
        const NumType result = distributions::uniform<NumType>::
          sample(real_rng, discrete_rng, min, max);
        mut.unlock();
        return result;
      } // end of uniform

      /**
       * Generate a random number in the uniform real with range [min,
       * max) or [min, max] if the number type is discrete.
       */
      template<typename NumType>
      inline NumType fast_uniform(const NumType min, const NumType max) { 
        mut.lock();
        const NumType result = distributions::uniform<NumType>::
          sample(real_rng, fast_discrete_rng, min, max);
        mut.unlock();
        return result;
      } // end of fast_uniform


      /**
       * Generate a random number in the uniform real with range [min,
       * max);
       */
      inline double gamma(const double alpha = double(1)) {
        boost::gamma_distribution<double> gamma_dist(alpha);
        mut.lock();
        const double result = gamma_dist(real_rng);
        mut.unlock();
        return result;
      } // end of gamma


      /**
       * Generate a gaussian random variable with zero mean and unit
       * variance.
       */
      inline double gaussian(const double mean = double(0), 
                             const double stdev = double(1)) {
        boost::normal_distribution<double> normal_dist(mean,stdev);
        mut.lock();
        const double result = normal_dist(real_rng);
        mut.unlock();
        return result;
      } // end of gaussian

      /**
       * Generate a gaussian random variable with zero mean and unit
       * variance.
       */
      inline double normal(const double mean = double(0), 
                           const double stdev = double(1)) {
        return gaussian(mean, stdev);
      } // end of normal


      inline bool bernoulli(const double p = double(0.5)) {
        boost::bernoulli_distribution<double> dist(p);
        mut.lock();
        const double result(dist(discrete_rng));
        mut.unlock();
        return result;
      } // end of bernoulli

      inline bool fast_bernoulli(const double p = double(0.5)) {
        boost::bernoulli_distribution<double> dist(p);
        mut.lock();
        const double result(dist(fast_discrete_rng));
        mut.unlock();
        return result;
      } // end of bernoulli


      /**
       * Draw a random number from a multinomial
       */
      template<typename Double>
      size_t multinomial(const std::vector<Double>& prb) {
        ASSERT_GT(prb.size(),0);
        if (prb.size() == 1) { return 0; }
        Double sum(0);
        for(size_t i = 0; i < prb.size(); ++i) {
          ASSERT_GE(prb[i], 0); // Each entry must be P[i] >= 0
          sum += prb[i];
        }
        ASSERT_GT(sum, 0); // Normalizer must be positive
        // actually draw the random number
        const Double rnd(uniform<Double>(0,1));
        size_t ind = 0;
        for(Double cumsum(prb[ind]/sum); 
            rnd > cumsum && (ind+1) < prb.size(); 
            cumsum += (prb[++ind]/sum));
        return ind;
      } // end of multinomial


      /**
       * Generate a draw from a multinomial using a CDF.  This is
       * slightly more efficient since normalization is not required
       * and a binary search can be used.
       */
      template<typename Double>
      inline size_t multinomial_cdf(const std::vector<Double>& cdf) {
        return std::upper_bound(cdf.begin(), cdf.end(),
                                uniform<Double>(0,1)) - cdf.begin();
        
      } // end of multinomial_cdf


      /** 
       * Construct a random permutation
       */ 
      template<typename T>
      inline std::vector<T> permutation(const size_t nelems) { 
        std::vector<T> perm(nelems);
        for(T i = 0; i < nelems; ++i) perm[i] = i;
        shuffle(perm);
        return perm;
      } // end of construct a permutation
      
      /** 
       * Shuffle a standard vector
       */ 
      template<typename T>
      void shuffle(std::vector<T>& vec) { shuffle(vec.begin(), vec.end()); }

      /** 
       * Shuffle a range using the begin and end iterators
       */ 
      template<typename Iterator>
      void shuffle(Iterator begin, Iterator end) {
        mut.lock();
        shuffle_functor functor(*this);
        std::random_shuffle(begin, end, functor);
        mut.unlock();
      } // end of shuffle

    private:
      //////////////////////////////////////////////////////
      /// Data members
      struct shuffle_functor {
        generator& gen;
        inline shuffle_functor(generator& gen) : gen(gen) { }
        inline std::ptrdiff_t operator()(std::ptrdiff_t end) {
          return distributions::uniform<ptrdiff_t>::
            sample(gen.real_rng, gen.fast_discrete_rng, 0, end-1);
        }
      };

      
      //! The real random number generator
      real_rng_type real_rng;
      //! The discrete random number generator
      discrete_rng_type discrete_rng;
      //! The fast discrete random number generator
      fast_discrete_rng_type fast_discrete_rng;
      //! lock used to access local members
      mutex mut;      
    }; // end of class generator


    /**
     * \ingroup random
     * Seed all generators using the default seed
     */
    void seed();

    /**
     * \ingroup random
     * Seed all generators using an integer
     */
    void seed(size_t seed_value);

    /**
     * \ingroup random
     * Seed all generators using a nondeterministic source
     */
    void nondet_seed();

    /**
     * \ingroup random
     * Seed all generators using the current time in microseconds
     */
    void time_seed();
    

    /**
     * \ingroup random
     * Get the local generator
     */
    generator& get_source();

    /**
     * \ingroup random
     * Generate a random number in the uniform real with range [min,
     * max) or [min, max] if the number type is discrete.
     */
    template<typename NumType>
    inline NumType uniform(const NumType min, const NumType max) { 
      if (min == max) return min;
      return get_source().uniform<NumType>(min, max);
    } // end of uniform
    
    /**
     * \ingroup random
     * Generate a random number in the uniform real with range [min,
     * max) or [min, max] if the number type is discrete.
     */
    template<typename NumType>
    inline NumType fast_uniform(const NumType min, const NumType max) { 
      if (min == max) return min;
      return get_source().fast_uniform<NumType>(min, max);
    } // end of fast_uniform
    
    /**
     * \ingroup random
     * Generate a random number between 0 and 1
     */
    inline double rand01() { return uniform<double>(0, 1); }

    /**
     * \ingroup random
     * Simulates the standard rand function as defined in cstdlib
     */
    inline int rand() { return fast_uniform(0, RAND_MAX); }


    /**
     * \ingroup random
     * Generate a random number from a gamma distribution.
     */
    inline double gamma(const double alpha = double(1)) {
      return get_source().gamma(alpha);
    }


    /**
     * \ingroup random
     * Generate a gaussian random variable with zero mean and unit
     * standard deviation.
     */
    inline double gaussian(const double mean = double(0), 
                           const double stdev = double(1)) {
      return get_source().gaussian(mean, stdev);
    }

    /**
     * \ingroup random
     * Generate a gaussian random variable with zero mean and unit
     * standard deviation.
     */
    inline double normal(const double mean = double(0), 
                         const double stdev = double(1)) {
      return get_source().normal(mean, stdev);
    }

    /**
     * \ingroup random
     * Draw a sample from a bernoulli distribution
     */
    inline bool bernoulli(const double p = double(0.5)) {
      return get_source().bernoulli(p);
    }

    /**
     * \ingroup random
     * Draw a sample form a bernoulli distribution using the faster generator
     */
    inline bool fast_bernoulli(const double p = double(0.5)) {
      return get_source().fast_bernoulli(p);
    }

    /**
     * \ingroup random
     * Generate a draw from a multinomial.  This function
     * automatically normalizes as well.
     */
    template<typename Double>
    inline size_t multinomial(const std::vector<Double>& prb) {
      return get_source().multinomial(prb);
    }


    /**
     * \ingroup random
     * Generate a draw from a cdf;
     */
    template<typename Double>
    inline size_t multinomial_cdf(const std::vector<Double>& cdf) {
      return get_source().multinomial_cdf(cdf);
    }


    /** 
     * \ingroup random
     * Construct a random permutation
     */ 
    template<typename T>
    inline std::vector<T> permutation(const size_t nelems) { 
      return get_source().permutation<T>(nelems); 
    }


    /** 
     * \ingroup random
     * Shuffle a standard vector
     */ 
    template<typename T>
    inline void shuffle(std::vector<T>& vec) { 
      get_source().shuffle(vec); 
    }
   
    /** 
     * \ingroup random
     * Shuffle a range using the begin and end iterators
     */ 
    template<typename Iterator>
    inline void shuffle(Iterator begin, Iterator end) {
      get_source().shuffle(begin, end);
    }

    /**
     * Converts a discrete PDF into a CDF
     */
    void pdf2cdf(std::vector<double>& pdf);


  }; // end of random 
}; // end of graphlab


#endif


================================================
FILE: src/graphlab/util/resizing_array_sink.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_RESIZING_COUNTING_SINK
#define GRAPHLAB_RESIZING_COUNTING_SINK

#include <graphlab/util/charstream.hpp>

namespace graphlab {

  typedef charstream_impl::resizing_array_sink<false> resizing_array_sink;
  
  /**
  Wraps a resizing array sink.
  */
  class resizing_array_sink_ref {
   private:
    resizing_array_sink* ras;
   public:
   

    typedef resizing_array_sink::char_type char_type;
    typedef resizing_array_sink::category category;

    inline resizing_array_sink_ref(resizing_array_sink& ref): ras(&ref) { }
  
    inline resizing_array_sink_ref(const resizing_array_sink_ref& other) :
      ras(other.ras) { }

    inline size_t size() const { return ras->size(); }
    inline char* c_str() { return ras->c_str(); }

    inline void clear() { ras->clear(); }
    /** the optimal buffer size is 0. */
    inline std::streamsize optimal_buffer_size() const { 
      return ras->optimal_buffer_size(); 
    }

    inline void relinquish() { ras->relinquish(); }

    inline void advance(std::streamsize n) { ras->advance(n); }

    
    inline std::streamsize write(const char* s, std::streamsize n) {
      return ras->write(s, n);
    }
  };
  
}
#endif


================================================
FILE: src/graphlab/util/safe_circular_char_buffer.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/safe_circular_char_buffer.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/assertions.hpp>

namespace graphlab {

  safe_circular_char_buffer::safe_circular_char_buffer(std::streamsize bufsize) 
    :bufsize(bufsize), head(0), tail(0), done(false), iswaiting(false){
    ASSERT_GT(bufsize, 0);
    buffer = (char*)malloc((size_t)bufsize);
  }

  safe_circular_char_buffer::~safe_circular_char_buffer() {
    free(buffer);
  }

  void safe_circular_char_buffer::stop_reader() {
    mut.lock();
    done = true;
    cond.signal();
    mut.unlock();
  }

  // Head == tail implies empty
  bool safe_circular_char_buffer::empty() const {
    return (head == tail);
  }

  std::streamsize safe_circular_char_buffer::size() const {
    std::streamsize headval = head;
    std::streamsize tailval = tail;
    if (tailval >= headval) return tailval - headval;
    else if (headval > tailval) return tailval + bufsize - headval;
    return 0;
  }
  
  std::streamsize safe_circular_char_buffer::free_space() const {
    return bufsize - size() - 1;
  }
  
  std::streamsize safe_circular_char_buffer::
  write(const char* c, std::streamsize clen) {
    mut.lock();
    std::streamsize ret = write_unsafe(c, clen);
    if (iswaiting && ret > 0) {
      cond.signal();
    }
    mut.unlock();
    return ret;
  } 

  std::streamsize safe_circular_char_buffer::
  write_unsafe(const char* c, std::streamsize clen) { 
    // If the char array does not fit simply return
    if (clen + size() >= bufsize) return 0;

    /// Adding c characters to the buffer
    /// 0--------------H...body...T------------->Bufsize
    /// 0--------------H...body...T--(Part A)--->Bufsize
    /// T--(Part B)----H...body....ccccccccccccc>Bufsize
    /// 0cccccccccccT--H...body....ccccccccccccc>Bufsize

    // First we copy the contents into Part A
    std::streamsize firstcopy = std::min(clen, bufsize - tail);
    memcpy(buffer + tail, c, (size_t)firstcopy);
    // Move the tail to the end
    tail += firstcopy;
    // If tail moved to the end wrap around
    if (tail == bufsize) tail = 0;
    // If the copy is not complete
    if (firstcopy < clen) {
      // Assert: This only happens on wrape around
      ASSERT_EQ(tail, 0);
      // Determine what is left to be coppied
      std::streamsize secondcopy = clen - firstcopy;
      ASSERT_GT(secondcopy, 0);
      // Do the copy and advance the pointer
      memcpy(buffer, c + firstcopy, (size_t)secondcopy);    
      tail += secondcopy;
      
    }
    return clen;
  }

  std::streamsize safe_circular_char_buffer::
  introspective_read(char* &s, std::streamsize clen) {
    ASSERT_GT(clen,0);
    // early termination check
    if(empty() || clen == 0) {
      s = NULL;
      return 0;
    }
    const std::streamsize curtail = tail;
    
    s = buffer + head;
    // how much we do read?  we can go up to the end of the requested
    // size or until a looparound
    // case 1: no looparound  |------H......T----->
    // case 2: looparound     |...T--H............>
    std::streamsize available_readlen = 0;
    const bool loop_around(curtail < head);
    if (loop_around) available_readlen = bufsize - head; 
    else available_readlen = curtail - head; 
    ASSERT_GE(available_readlen, 0);
    const std::streamsize actual_readlen =
      std::min(available_readlen, clen);
    ASSERT_GT(actual_readlen, 0);
    return actual_readlen;
  }


  std::streamsize safe_circular_char_buffer::
  blocking_introspective_read(char* &s, std::streamsize clen) {
    // try to read
    std::streamsize ret = introspective_read(s, clen);
    if (ret != 0) return ret;  
    // if read failed. acquire the lock and try again
    while(1) {
      iswaiting = true;
      mut.lock();
      while (empty() && !done) cond.wait(mut);
      iswaiting = false;
      mut.unlock();    
      std::streamsize ret = introspective_read(s, clen);
      if (ret != 0) return ret;
      if (done) return 0;
    }
  }

  
  void safe_circular_char_buffer::
  advance_head(const std::streamsize advance_len) {
    ASSERT_GE(advance_len, 0);
    ASSERT_LE(advance_len, size());
    // advance the head forward as far as possible
    head += advance_len;
    // If head wraps around move head to begginning and then offset
    if (head >= bufsize) head -= bufsize;    
  } // end of advance head


} // end of namespace


================================================
FILE: src/graphlab/util/safe_circular_char_buffer.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef SAFE_CIRCULAR_CHAR_BUFFER_HPP
#define SAFE_CIRCULAR_CHAR_BUFFER_HPP
#include <graphlab/rpc/circular_char_buffer.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/assertions.hpp>

namespace graphlab {

/**
\ingroup util
A non-resizing circular char buffer
with thread-safe write operations and a single reader 
*/
class safe_circular_char_buffer {
 public:
  safe_circular_char_buffer(std::streamsize bufsize = 10485760 /*10 MB */);

  ~safe_circular_char_buffer();
  
  /**
   * Stops the buffer and signals any blocking calls.
   */
  void stop_reader();


  /**
   * Determine if the buffer is empty
   */
  bool empty() const;

  inline bool is_done() const { 
    return done;
  }
  
  inline bool reader_is_blocked() const {
    return iswaiting;
  }
  /**
   * Get the total contents currently stored in the buffer.
   */
  std::streamsize size() const;

  /**
   * Get the amount of free space reamining in the buffer
   */
  std::streamsize free_space() const;

  /** Gets the size of the buffer. 
     \note: The useable space is reserved_size() - 1 */
  inline std::streamsize reserved_size() const {
    return bufsize - 1;
  }

  
  /**
   * Returns 0 if the write doesn't fit
   *
   * This function acquires the critical section
   * to perform the write
   */
  std::streamsize write(const char* c, std::streamsize clen);

  /**
   * Returns 0 if the write doesn't fit
   *
   * This does the same as write(), but does not acquire the critical
   * section. The caller should ensure safety
   */
  std::streamsize write_unsafe(const char* c, std::streamsize clen);


  /**
   * Returns a pointer (through s) and a length of the read.  This
   * pointer is a direct pointer into the internal buffer of this
   * datastructure. The pointer is valid as long as no other
   * operations are performed on this structure.  The length of the
   * introspective_read may be less than the number of bytes
   * requested. Multiple calls to introspective_read may be necessary
   * to read all data in the buffer. If the function returns 0, the
   * buffer is empty.
   * 
   * No locks are acquired on this call.
   */  
  std::streamsize introspective_read(char* &s, std::streamsize clen);
  
  
  /**
   * Same as introspective read. But blocks until there is something to read
   * This function does not acquire a critical section.
   */
  std::streamsize blocking_introspective_read(char* &s, 
                                              std::streamsize clen);


  void advance_head(const std::streamsize advance_len);
  
  
  /** When begin critical section returns, it is 
      guaranteed that no other writer will be touching
      the tail of the queue */
  inline void begin_critical_section() {
    mut.lock();
  }
  
  /** Releases a critical section acquired by begin_critical_section */
  inline void end_critical_section() {
    mut.unlock();
  }

  /** Releases a critical section acquired by begin_critical_section,
   and signals the reader to begin reading if the reader is blocked */  
  inline void end_critical_section_with_signal() {
    cond.signal();
    mut.unlock();
  }
  
  
 private:
  char* buffer;
  std::streamsize bufsize; // current size of the buffer

  /** 
   * points to the head of the queue.  Reader reads from here
   */
  std::streamsize head;  
  
  /** 
   * points to one past the end of the queue.  writer writes to
   * here. if tail == head, buffer must be empty
   */
  std::streamsize tail;  

  mutex mut;
  conditional cond;
  
  volatile bool done; // Once 
  volatile bool iswaiting;
};

}

#endif


================================================
FILE: src/graphlab/util/small_map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SMALL_MAP_HPP
#define GRAPHLAB_SMALL_MAP_HPP


#include <iostream>
#include <set>
#include <iterator>
#include <algorithm>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>

#include <graphlab/util/small_set.hpp>

#include <graphlab/macros_def.hpp>
namespace graphlab {

  template<size_t MAX_DIM, typename KeyT, typename ValueT>
  class small_map {
    template< size_t T1, size_t T2 >
    struct max_type { enum max_value { value = T1 < T2? T2 : T1 }; };
    struct less {
      bool operator()(const std::pair<KeyT, ValueT>& a, 
                      const std::pair<KeyT, ValueT>& b) const {
        return a.first < b.first;
      } 
    };
  

  public: 
    
    typedef small_set< MAX_DIM, std::pair<KeyT, ValueT>, less > 
    small_set_type;
    typedef typename small_set_type::value_type value_type;
    typedef typename small_set_type::iterator   iterator;
    typedef typename small_set_type::const_iterator const_iterator;
    typedef KeyT   key_type;
    typedef ValueT mapped_type;


  public:
    //! construct an empty map
    small_map() { }
    
    //! Construct a map with a single element
    small_map(const key_type& key, const mapped_type& value) :
      set(std::make_pair(key, value)) {  }
    

    //! Get the begin iterator
    inline iterator begin() { return set.begin(); }

    //! get the end iterator
    inline iterator end() { return set.end(); }


    //! Get the begin iterator
    inline const_iterator begin() const { return set.begin(); }

    //! Get the end iterator
    inline const_iterator end() const { return set.end(); }

    //! get the size of the set
    inline size_t size() const { return set.size(); }

    //! determine if there are any elements in the set
    inline bool empty() const { return set.empty(); }

    
    //! test whether the set contains the given element
    bool contains(const value_type& pair) const {
      return set.contains(pair);
    }

    //! test whether the set contains the given element
    bool contains(const key_type& key) const {
      const_iterator iter =
        std::lower_bound(set.begin(), 
                         set.end(), 
                         std::make_pair(key, mapped_type()));
      return (iter != set.end()) && (iter->first == key);
    }

    //! test whether the set has the given key
    inline bool has_key(const key_type& key) {
      return contains(key);
    }


    //! test whether the set contains the given set of element
    template<size_t OtherDim>
    bool contains(const small_map<OtherDim, KeyT, ValueT>& other) const {
      return set.contains(other.set);
    }

    template<size_t OtherDim>
    bool operator==(const small_map<OtherDim, KeyT, ValueT>& other) const {
      return set == other.set;
    }

    //! Lookup an element in the map
    inline const mapped_type& operator[](const key_type& key) const {
      value_type* const ptr = 
        std::lower_bound(set.begin(), set.end(), 
                         std::make_pair(key, mapped_type()),
                         less());
      ASSERT_NE(ptr, set.end());
      ASSERT_TURE(ptr->first == key);
      return ptr->second;
    }

    //! Lookup an element in the map
    inline mapped_type& operator[](const key_type& key) {
      value_type* ptr = 
        std::lower_bound(set.begin(), set.end(), 
                         std::make_pair(key, mapped_type()),
                         less());
      if(ptr != end() && (key == ptr->first) ) { return ptr->second; }
      // Add the entry to the map
      set += std::make_pair(key, ValueT());
      ptr = 
        std::lower_bound(set.begin(), set.end(),
                         std::make_pair(key, mapped_type()),
                         less());
      ASSERT_NE(ptr, set.end());
      ASSERT_TRUE(ptr->first == key);
      return ptr->second;
    }

    inline mapped_type& safe_find(const key_type& key) {
      value_type* const ptr = 
        std::lower_bound(set.begin(), set.end(), 
                         std::make_pair(key, mapped_type()),
                         less());
      ASSERT_NE(ptr, set.end());
      ASSERT_TRUE(ptr->first == key);
      return ptr->second;
    }

    //! Take the union of two maps
    template<size_t OtherDim>
    inline small_map<max_type<OtherDim, MAX_DIM>::value, KeyT, ValueT> 
    operator+(const small_map<OtherDim, KeyT, ValueT>& other) const {      
      typedef small_map<max_type<OtherDim, MAX_DIM>::value, KeyT, ValueT >
        result_type;
      result_type result;
      result.set = set + other.set;
      return result;
    }


  private:
    small_set_type set;
    

  }; // end of small map


  template<size_t MAX_DIM, typename KeyT, typename ValueT>
  std::ostream&
  operator<<(std::ostream& out,
            const graphlab::small_map<MAX_DIM, KeyT, ValueT>& map) {
    typedef std::pair<KeyT, ValueT> pair_type;
    size_t index = 0;
    out << '{';
    foreach(const pair_type& pair, map) {
      out << pair.first << "->" << pair.second;
      if(++index < map.size()) out << ", ";
    }
    return out << '}';
  }

}; // end graphlab

#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/util/small_set.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SMALL_SET_HPP
#define GRAPHLAB_SMALL_SET_HPP


#include <iostream>
#include <set>
#include <iterator>
#include <algorithm>

#include <graphlab/serialization/iarchive.hpp>
#include <graphlab/serialization/oarchive.hpp>


#include <graphlab/macros_def.hpp>
namespace graphlab {


  /**
   * This class implements a dense set of fixed maximum size which
   * support quick operations with stack allocation.
   */
  template<size_t MAX_DIM, typename T, typename Less = std::less<T> >
  class small_set {
  public: // typedefs
    typedef T value_type;
    typedef T& reference;
    typedef const T& const_reference;
    typedef ptrdiff_t difference_type;
    typedef size_t size_type;
    typedef T* iterator;
    typedef const T* const_iterator;
    enum sizes {max_dim_type = MAX_DIM };


    template< size_t T1, size_t T2 >
    struct max_type { enum max_value { value = T1 < T2? T2 : T1 }; };

    struct Equals { 
      inline bool operator()(const T& a, const T& b) const {
        return !Less()(a,b) && !Less()(b,a);
      }
    }; // end of equals

  public:
    //! Construct an empty set
    small_set() : nelems(0) { }

    //! Create a stack set with just one element
    small_set(const T& elem) : nelems(1) { values[0] = elem; }
    
    /**
     * Create a stack from an std set by adding each element one at a
     * time
     */
    template<typename OtherT>
    small_set(const std::set<OtherT>& other) : nelems(other.size()) { 
      ASSERT_LE(nelems, MAX_DIM);
      size_t index = 0;
      foreach(const OtherT& elem, other) values[index++] = elem;
    }


    /**
     * Create a stack from an std set by adding each element one at a
     * time
     */
    template<size_t OtherSize>
    small_set(const small_set<OtherSize, T, Less>& other) : 
      nelems(other.size()) { 
      ASSERT_LE(nelems, MAX_DIM);
      size_t index = 0;
      for(const T* elem = other.begin(); elem != other.end(); ++elem) 
        values[index++] = *elem;
    }


    //! Get the begin iterator
    inline T* begin() { return values; }

    //! get the end iterator
    inline T* end() { return values + nelems; }


    //! Get the begin iterator
    inline const T* begin() const { return values; }

    //! Get the end iterator
    inline const T* end() const { return values + nelems; }

    //! get the size of the set
    inline size_t size() const { return nelems; }

    //! determine if there are any elements in the set
    inline bool empty() const { return size() == 0; }

    
    //! test whether the set contains the given element
    bool contains(const T& elem) const {
      return std::binary_search(begin(), end(), elem, Less());
    }

    //! test whether the set contains the given set of element
    template<size_t OtherDim>
    bool contains(const small_set<OtherDim, T, Less>& other) const {
      return std::includes(begin(), end(), 
                           other.begin(), other.end(), Less());
    }


    /**
     * Test if this set is contained in other.  If so this returns
     * true. 
     */
    template<size_t OtherDim>
    bool operator<=(const small_set<OtherDim, T, Less>& other) const {
      return other.contains(*this);
    }


    /**
     * Test if this set is contained in other.  If so this returns
     * true. 
     */
    template<size_t OtherDim>
    bool operator<(const small_set<OtherDim, T, Less>& other) const {
      return other.contains(*this) && size() < other.size();
    }

    template<size_t OtherDim>
    bool operator==(const small_set<OtherDim, T, Less>& other) const {
      if(size() != other.size()) return false;
      return std::equal(begin(), end(), other.begin(), Equals());
    }


    //! insert an element into this set
    inline void insert(const T& elem) {
      *this += elem;
    }


    //! insert a range of elements into this set
    inline void insert(const T* begin, const T* end) {
      // Ensure that other size is not negative
      ASSERT_LE(begin, end);
      // Ensure that the other set has an appropriate size
      const size_t other_size = end - begin;  
      ASSERT_LE(other_size, MAX_DIM);
      // Construct a temporary small set representing the range
      small_set other;
      for(size_t i = 0; i < other_size; ++i) {
        other[i] = begin[i];
        // Ensure that the other set is sorted
        if(i+1 < other_size) ASSERT_LT(begin[i], begin[i+1]);
      }
      // Insert it into this small set using the + operation
      *this += other;
    }


    //! remove an element from the set
    void erase(const T& elem) { *this -= elem; }
    

    //! get the element at a particular location
    virtual const T& operator[](size_t index) const {
      ASSERT_LT(index, nelems);
      return values[index];
    }


    // //! Take the union of two sets
    // inline small_set operator+(const small_set& other) const {
    //   small_set result;
    //   size_t i = 0, j = 0;
    //   while(i < size() && j < other.size()) {
    //     assert(result.nelems < MAX_DIM);
    //     if(values[i] < other.values[j])  // This comes first
    //       result.values[result.nelems++] = values[i++];
    //     else if (values[i] > other.values[j])  // other comes first
    //       result.values[result.nelems++] = other.values[j++];
    //     else {  // both are same
    //       result.values[result.nelems++] = values[i++]; j++;
    //     }
    //   }
    //   // finish writing this
    //   while(i < size()) {
    //     assert(result.nelems < MAX_DIM);
    //     result.values[result.nelems++] = values[i++];
    //   }
    //   // finish writing other
    //   while(j < other.size()) {
    //     assert(result.nelems < MAX_DIM);
    //     result.values[result.nelems++] = other.values[j++];
    //   }
    //   return result;
    // }


    //! Take the union of two sets
    inline small_set operator+(const T& elem) const {
      small_set result(*this);
      return result += elem;
    }


    //! Take the union of two sets
    template<size_t OtherDim>
    inline small_set< max_type<OtherDim, MAX_DIM>::value, T, Less> 
    operator+(const small_set<OtherDim, T, Less>& other) const {
      typedef small_set< max_type<OtherDim, MAX_DIM>::value, T, Less>
        result_type;
      result_type result;
      const T* new_end = 
        std::set_union(begin(), end(),
                       other.begin(), other.end(),
                       safe_iterator(result.begin(), 
                                     result.absolute_end()),
                       Less()).begin;
      result.nelems = new_end - result.begin();
      ASSERT_LE(result.nelems, result_type::max_dim_type);
      return result;
    }


    //! Add the other set to this set
    template<size_t OtherDim>
    inline small_set& operator+=(const small_set<OtherDim, T, Less>& other) {
      *this = *this + other;
      return *this;
    }


    //! Add an element to this set. This is "optimized" since it is
    //! used frequently
    inline small_set& operator+=(const T& elem) {
      // // Find where elem should be inserted
      // size_t index = 0;
      // for(; index < nelems && values[index] < elem; ++index);      
      T* ptr(std::lower_bound(begin(), end(), elem, Less()));     
      // if the element already exists return
      if(ptr != end() && !(elem < *ptr) ) return *this;
      // otherwise the element does not exist so add it at the current
      // location and increment the number of elements
      T tmp(elem); nelems++; // advances end
      ASSERT_LE(nelems, MAX_DIM);
      // Insert the element at index swapping out the rest of the
      // array
      for(; ptr < end(); ++ptr) std::swap(*ptr, tmp);      
      // Finished return
      return *this;
    }


    //! Remove the other set from this set
    template<size_t OtherDim>
    small_set& operator-=(const small_set<OtherDim, T, Less>& other) {
      // if(other.size() == 0) return *this;    
      // // Backup the old nelems and reset nelems
      // size_t old_nelems = size(); nelems = 0;
      // for(size_t i = 0, j = 0; i < old_nelems; ++i ) {
      //   // advance the other index
      //   for( ; j < other.size() && values[i] > other.values[j]; ++j);
      //   // otherwise check equality or move forward
      //   if(j >= other.size() || (values[i] != other.values[j])) 
      //     values[nelems++] = values[i];
      // }
      // ASSERT_LE(nelems, MAX_DIM);
      *this = *this - other;
      return *this;
    }

    //! subtract the right set form the left set
    template<size_t OtherDim>
    small_set operator-(const small_set<OtherDim, T, Less>& other) const {
      // small_set result = *this;
      // result -= other;
      small_set result;
      T* const new_end =
        std::set_difference(begin(), end(), 
                            other.begin(), other.end(),
                            safe_iterator(result.begin(),
                                          result.absolute_end()),
                            Less()).begin;
      result.nelems = new_end - result.begin();
      ASSERT_LE(result.nelems, MAX_DIM);
      return result;
    }

    //! Take the intersection of two sets
    template<size_t OtherDim>
    small_set operator*(const small_set<OtherDim, T, Less>& other) const {
      small_set result;
      const T* new_end = 
        std::set_intersection(begin(), end(), 
                              other.begin(), other.end(),
                              safe_iterator(result.begin(), 
                                            result.absolute_end()),
                              Less()).begin;
      result.nelems = new_end - result.end();
      ASSERT_LE(result.nelems, MAX_DIM);
      return result;
    }

    //! Take the intersection of two sets
    template<size_t OtherDim>
    small_set operator*=(const small_set<OtherDim, T, Less>& other)  {
      *this = *this * other;
      return *this;
    }

    //! Load the set form file
    void load(iarchive& arc) {
      arc >> nelems;
      assert(nelems <= MAX_DIM);
      for(size_t i = 0; i < nelems; ++i) {
        arc >> values[i];
        if( i > 0 ) assert(values[i] > values[i-1]);
      }
    }
    
    //! Save the set to file
    void save(oarchive& arc) const {
      arc << nelems;
      for(size_t i = 0; i < nelems; ++i) arc << values[i];
    }
  private:
    size_t nelems;
    T values[MAX_DIM];

                     
    //! get the end iterator to the complete range
    inline T* absolute_end() { return values + MAX_DIM; }


    struct safe_iterator : 
      public std::iterator<std::input_iterator_tag, T>  {
      T* begin;
      const T* end;
      safe_iterator(const safe_iterator& other) :
        begin(other.begin), end(other.end) { }
      safe_iterator(T* begin, const T* end) : 
        begin(begin), end(end) {
        ASSERT_NE(begin, NULL);
        ASSERT_NE(end, NULL);
        ASSERT_LE(begin, end);
      }
      inline safe_iterator& operator++() { ++begin; return *this; }
      inline safe_iterator& operator++(int) {
        safe_iterator tmp(*this); operator++(); return tmp;
      }
      inline bool operator==(const safe_iterator& other) {
        ASSERT_EQ(end, other.end);
        return begin == other.begin;
      }
      inline bool operator!=(const safe_iterator& other) {
        return !operator==(other);
      }
      T& operator*() { ASSERT_LT(begin, end); return *begin; }
    };   
  }; // end of small_set

  template<size_t MAX_DIM, typename T>
  std::ostream&
  operator<<(std::ostream& out, const graphlab::small_set<MAX_DIM, T>& set) {
    out << "{";
    for(size_t i = 0; i < set.size(); ++i) {
      out << set[i];
      if(i + 1 < set.size()) out << ", ";
    }
    out << "}";
    return out;
  }
}; // end of graphlab namespace


#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: src/graphlab/util/stl_util.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// Probabilistic Reasoning Library (PRL)
// Copyright 2009 (see AUTHORS.txt for a list of contributors)
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

#ifndef GRAPHLAB_STL_UTIL_HPP
#define GRAPHLAB_STL_UTIL_HPP


#include <set>
#include <map>
#include <vector>
#include <algorithm>
#include <iterator>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <graphlab/logger/assertions.hpp>

// #include <graphlab/serialization/serialize.hpp>
// #include <graphlab/serialization/set.hpp>
// #include <graphlab/serialization/map.hpp>

#include <graphlab/macros_def.hpp>

namespace graphlab {

  // Functions on sets
  //============================================================================

  /**
   * computes the union of two sets.
   */
  template <typename T>
  std::set<T> set_union(const std::set<T>& a, const std::set<T>& b) {
    std::set<T> output;
    std::set_union(a.begin(), a.end(), 
                   b.begin(), b.end(),
                   std::inserter(output, output.begin()));
    return output;
  }
  
  template <typename T>
  std::set<T> set_union(const std::set<T>& a, const T& b) {
    std::set<T> output = a;
    output.insert(b);
    return output;
  }

  template <typename T>
  std::set<T> set_intersect(const std::set<T>& a, const std::set<T>& b) {
    std::set<T> output;
    std::set_intersection(a.begin(), a.end(), 
                          b.begin(), b.end(),
                          std::inserter(output, output.begin()));
    return output;
  }

  template <typename T>
  std::set<T> set_difference(const std::set<T>& a, const std::set<T>& b) {
    std::set<T> output;
    std::set_difference(a.begin(), a.end(), 
                        b.begin(), b.end(),
                        std::inserter(output, output.begin()));
    return output;
  }


  template <typename T>
  std::set<T> set_difference(const std::set<T>& a, const T& b) {
    std::set<T> output = a;
    output.erase(b);
    return output;
  }

  //! @return 2 sets: <s in partition, s not in partition>
  template <typename T>
  std::pair<std::set<T>,std::set<T> > 
  set_partition(const std::set<T>& s, const std::set<T>& partition) {
    std::set<T> a, b;
    a = set_intersect(s, partition);
    b = set_difference(s, partition);
    return std::make_pair(a, b);
  }

  template <typename T>
  bool set_disjoint(const std::set<T>& a, const std::set<T>& b) {
    return (intersection_size(a,b) == 0);
  }
  
  template <typename T>
  bool set_equal(const std::set<T>& a, const std::set<T>& b) {
    if (a.size() != b.size()) return false;
    return a == b; // defined in <set>
  }
  
  template <typename T>
  bool includes(const std::set<T>& a, const std::set<T>& b) {
    return std::includes(a.begin(), a.end(), b.begin(), b.end());
  }

  /** 
   * Returns true if $a \subseteq b$
   */
  template <typename T>
  bool is_subset(const std::set<T>& a, const std::set<T>& b) {
    return includes(b, a);
  }

  template <typename T>
  bool is_superset(const std::set<T>& a,const std::set<T>& b) {
    return includes(a, b);
  }
  
  //! Writes a human representation of the set to the supplied stream.
  template <typename T>
  std::ostream& operator<<(std::ostream& out, const std::set<T>& s) {
    return print_range(out, s, '{', ' ', '}');
  }

  // Functions on maps
  //============================================================================

  /**
   * constant lookup in a map. assertion failure of key not found in map
   */
  template <typename Key, typename T>
  const T& safe_get(const std::map<Key, T>& map,
                    const Key& key) {
    typedef typename std::map<Key, T>::const_iterator iterator;
    iterator iter = map.find(key);
    ASSERT_TRUE(iter != map.end());
    return iter->second;
  } // end of safe_get

  /**
   * constant lookup in a map. If key is not found in map, 
   * 'default_value' is returned. Note that this can't return a reference
   * and must return a copy
   */
  template <typename Key, typename T>
  const T safe_get(const std::map<Key, T>& map,
                    const Key& key, const T default_value) {
    typedef typename std::map<Key, T>::const_iterator iterator;
    iterator iter = map.find(key);
    if (iter == map.end())   return default_value;
    else return iter->second;
  } // end of safe_get

  /**
   * Transform each key in the map using the key_map
   * transformation. The resulting map will have the form
   * output[key_map[i]] = map[i]
   */
  template <typename OldKey, typename NewKey, typename T>
  std::map<NewKey, T>
  rekey(const std::map<OldKey, T>& map,
        const std::map<OldKey, NewKey>& key_map) {
    std::map<NewKey, T> output;
    typedef std::pair<OldKey, T> pair_type;
    foreach(const pair_type& pair, map) {
      output[safe_get(key_map, pair.first)] = pair.second;
    }
    return output;
  }

  /**
   * Transform each key in the map using the key_map
   * transformation. The resulting map will have the form
   output[i] = remap[map[i]]
  */
  template <typename Key, typename OldT, typename NewT>
  std::map<Key, NewT>
  remap(const std::map<Key, OldT>& map,
        const std::map<OldT, NewT>& val_map) {
    std::map<Key, NewT> output;
    typedef std::pair<Key, OldT> pair_type;
    foreach(const pair_type& pair, map) {
      output[pair.first] = safe_get(val_map, pair.second);
    }
    return output;
  }

  /**
   * Inplace version of remap
   */
  template <typename Key, typename T>
  void remap(std::map<Key, T>& map,
             const std::map<T, T>& val_map) {
    typedef std::pair<Key, T> pair_type;
    foreach(pair_type& pair, map) {
      pair.second = safe_get(val_map, pair.second);
    }
  }

  /**
   * Computes the union of two maps
   */
  template <typename Key, typename T>
  std::map<Key, T> 
  map_union(const std::map<Key, T>& a,
            const std::map<Key, T>& b) {
    // Initialize the output map
    std::map<Key, T> output;
    std::set_union(a.begin(), a.end(),
                   b.begin(), b.end(),
                   std::inserter(output, output.begin()),
                   output.value_comp());
    return output;
  }
  
  /**
   * Computes the intersection of two maps
   */
  template <typename Key, typename T>
  std::map<Key, T> 
  map_intersect(const std::map<Key, T>& a,
                const std::map<Key, T>& b) {
    // Initialize the output map
    std::map<Key, T> output;
    // compute the intersection
    std::set_intersection(a.begin(), a.end(),
                          b.begin(), b.end(),
                          std::inserter(output, output.begin()),
                          output.value_comp());
    return output;
  }
  
  /**
   * Returns the entries of a map whose keys show up in the set keys
   */
  template <typename Key, typename T>
  std::map<Key, T> 
  map_intersect(const std::map<Key, T>& m,
                const std::set<Key>& keys) {
    std::map<Key, T> output;
    foreach(const Key& key, keys) {
      typename std::map<Key,T>::const_iterator it = m.find(key);
      if (it != m.end())
        output[key] = it->second;
    }
    return output;
  }

  /**
   * Computes the difference between two maps
   */
  template <typename Key, typename T>
  std::map<Key, T> 
  map_difference(const std::map<Key, T>& a,
                 const std::map<Key, T>& b) {
    // Initialize the output map
    std::map<Key, T> output;
    // compute the intersection
    std::set_difference(a.begin(), a.end(),
                        b.begin(), b.end(),
                        std::inserter(output, output.begin()),
                        output.value_comp());
    return output;
  }


  /**
   * Returns the set of keys in a map
   */
  template <typename Key, typename T>
  std::set<Key> keys(const std::map<Key, T>& map) {
    std::set<Key> output;
    typedef std::pair<Key, T> pair_type;
    foreach(const pair_type& pair, map) {
      output.insert(pair.first);
    }
    return output;
  }

  /**
   * Get teh set of keys in a map as a vector
   */
  template <typename Key, typename T>
  std::vector<Key> keys_as_vector(const std::map<Key, T>& map) {
    std::vector<Key> output(map.size());   
    typedef std::pair<Key, T> pair_type;
    size_t i = 0;
    foreach(const pair_type& pair, map) {
      output[i++] = pair.first;
    }
    return output;
  }


  /**
   * Gets the values from a map
   */
  template <typename Key, typename T>
  std::set<T> values(const std::map<Key, T>& map) {
    std::set<T> output;
    typedef std::pair<Key, T> pair_type;
    foreach(const pair_type& pair, map) {
      output.insert(pair.second);
    }
    return output;
  }
  
  template <typename Key, typename T>
  std::vector<T> values(const std::map<Key, T>& m, 
                        const std::set<Key>& keys) {
    std::vector<T> output;

    foreach(const Key &i, keys) {
      output.push_back(safe_get(m, i));
    }
    return output;
  }
  
  template <typename Key, typename T>
  std::vector<T> values(const std::map<Key, T>& m, 
                        const std::vector<Key>& keys) {
    std::vector<T> output;
    foreach(const Key &i, keys) {
      output.push_back(safe_get(m, i));
    }
    return output;
  }
  
  //! Creates an identity map (a map from elements to themselves)
  template <typename Key>
  std::map<Key, Key> make_identity_map(const std::set<Key>& keys) {
    std::map<Key, Key> m;
    foreach(const Key& key, keys) 
      m[key] = key;
    return m;
  }

  //! Writes a map to the supplied stream.
  template <typename Key, typename T>
  std::ostream& operator<<(std::ostream& out, const std::map<Key, T>& m) {
    out << "{";
    for (typename std::map<Key, T>::const_iterator it = m.begin(); 
         it != m.end();) {
      out << it->first << "-->" << it->second;
      if (++it != m.end()) out << " ";
    }
    out << "}";
    return out;
  }

  /** Removes white space (space and tabs) from the beginning and end of str,
      returning the resultant string
  */
  inline std::string trim(const std::string& str) {
    std::string::size_type pos1 = str.find_first_not_of(" \t");
    std::string::size_type pos2 = str.find_last_not_of(" \t");
    return str.substr(pos1 == std::string::npos ? 0 : pos1,
                      pos2 == std::string::npos ? str.size()-1 : pos2-pos1+1);
  }

  /**
  * Convenience function for using std streams to convert anything to a string
  */
  template<typename T>
  std::string tostr(const T& t) {
    std::stringstream strm;
    strm << t;
    return strm.str();
  }

  /**
  * Convenience function for using std streams to convert a string to anything
  */
  template<typename T>
  T fromstr(const std::string& str) {
    std::stringstream strm(str);
    T elem; 
    strm >> elem;
    ASSERT_FALSE(strm.fail());
    return elem;
  }

  /**
  Returns a string representation of the number,
  padded to 'npad' characters using the pad_value character
  */
  inline std::string pad_number(const size_t number,
                                const size_t npad,
                                const char pad_value = '0') {
    std::stringstream strm;
    strm << std::setw((int)npad) << std::setfill(pad_value)
         << number;
    return strm.str();
  }


  // inline std::string change_suffix(const std::string& fname,
  //                                  const std::string& new_suffix) {             
  //   size_t pos = fname.rfind('.');
  //   assert(pos != std::string::npos); 
  //   const std::string new_base(fname.substr(0, pos));
  //   return new_base + new_suffix;
  // } // end of change_suffix


  /**
  Using splitchars as delimiters, splits the string into a vector of strings.
  if auto_trim is true, trim() is called on all the extracted strings
  before returning.
  */
  inline std::vector<std::string> strsplit(const std::string& str, 
                                           const std::string& splitchars,
                                           const bool auto_trim = false) {
    std::vector<std::string> tokens;
    for(size_t beg = 0, end = 0; end != std::string::npos; beg = end+1) {
      end = str.find_first_of(splitchars, beg);
      if(auto_trim) {
        if(end - beg > 0) {
          std::string tmp = trim(str.substr(beg, end - beg));
          if(!tmp.empty()) tokens.push_back(tmp);
        }
      } else tokens.push_back(str.substr(beg, end - beg));
    }
    return tokens;
    // size_t pos = 0;
    // while(1) {
    //   size_t nextpos = s.find_first_of(splitchars, pos);
    //   if (nextpos != std::string::npos) {
    //     ret.push_back(s.substr(pos, nextpos - pos));
    //     pos = nextpos + 1;
    //   } else {
    //     ret.push_back(s.substr(pos));
    //     break;
    //   }
    // }
    // return ret;
  }
}; // end of namespace graphlab

#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/util/synchronized_unordered_map.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SYNCHRONIZED_UNORDERED_MAP
#define GRAPHLAB_SYNCHRONIZED_UNORDERED_MAP
#include <vector>
#include <boost/unordered_map.hpp>
#include <graphlab/parallel/pthread_tools.hpp>

namespace graphlab {

/// \ingroup util_internal
template <typename Data>
class synchronized_unordered_map {
 public:
  typedef boost::unordered_map<size_t, Data> container;
  typedef typename container::iterator iterator;
  typedef typename container::const_iterator const_iterator;

  typedef std::pair<bool, Data*> datapointer;
  typedef std::pair<bool, const Data*> const_datapointer;
  typedef Data value_type;
  typedef size_t key_type;
  
 private:
   std::vector<container> data;
   std::vector<rwlock> lock; 
   size_t nblocks;
 public:
   synchronized_unordered_map(size_t numblocks):data(numblocks), 
                                                lock(numblocks),
                                                nblocks(numblocks) { 
    for (size_t i = 0;i < numblocks; ++i) {
      data[i].max_load_factor(1.0);
    }
  }

   std::pair<bool, Data*> find(size_t key) {
     size_t b = key % nblocks;
     lock[b].readlock();
     iterator iter = data[b].find(key);
     std::pair<bool, Data*> ret = std::make_pair(iter != data[b].end(), &(iter->second));
     lock[b].rdunlock();
     return ret;
   }
   
   /**
   return std::pair<found, iterator>
   if not found, iterator is invalid
   */
   std::pair<bool, const Data*> find(size_t key) const {
     size_t b = key % nblocks;
     lock[b].readlock();
     const_iterator iter = data[b].find(key);
     std::pair<bool, const Data*> ret = std::make_pair(iter != data[b].end(), &(iter->second));
     lock[b].rdunlock();
     return ret;
   }
   
   // care must be taken that  you do not access an erased iterator
   void erase(size_t key) {
     size_t b = key % nblocks;
     lock[b].writelock();
     data[b].erase(key);
     lock[b].wrunlock();
   }

   template<typename Predicate>
   void erase_if(size_t key, Predicate pred) {
     size_t b = key % nblocks;
     lock[b].writelock();
     iterator iter = data[b].find(key); 
      
     if (iter != data[b].end() && pred(iter->second))  data[b].erase(key);
     lock[b].wrunlock();
   }

   value_type& insert(size_t key, const value_type &val) {
     size_t b = key % nblocks;
     lock[b].writelock();
     data[b][key] = val;
     value_type& ret = data[b][key];
     lock[b].wrunlock();
     return ret;
   }

    /**
    returns std::pair<success, iterator>
    on success, iterator will point to the entry
    on failure, iterator will point to an existing entry
    */
   std::pair<bool, Data*> insert_with_failure_detect(size_t key, const value_type &val) {
     std::pair<bool, Data*> ret ;
     
     size_t b = key % nblocks;
     lock[b].writelock();
     //search for it
     iterator iter = data[b].find(key);
     // if it not in the table, write and return
     if (iter == data[b].end()) {
      data[b][key] = val;
      ret = std::make_pair(true,  &(data[b].find(key)->second));
     }
     else {
      ret = std::make_pair(false,  &(iter->second));
     }
     lock[b].wrunlock();
     return ret;
   }

   void clear() {
     for (size_t i = 0;i < data.size(); ++i) {
       data[i].clear();
     }
   }
};
}
#endif


================================================
FILE: src/graphlab/util/synchronized_unordered_map2.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef SYNCHRONIZED_UNORDERED_MAP2
#define SYNCHRONIZED_UNORDERED_MAP2
#include <vector>
#include <boost/unordered_map.hpp>
#include <graphlab/parallel/pthread_tools.hpp>


namespace graphlab {
/*
 \ingroup util_internal
An alternate form of the synchronized unordered map, built around the
use of critical sections
*/
template <typename Data>
class synchronized_unordered_map2 {
 public:
  typedef boost::unordered_map<size_t, Data> container;
  typedef typename container::iterator iterator;
  typedef typename container::const_iterator const_iterator;

  typedef std::pair<bool, Data*> datapointer;
  typedef std::pair<bool, const Data*> const_datapointer;
  typedef Data value_type;
  typedef size_t key_type;

 private:
   std::vector<container> data;
   std::vector<rwlock> lock;
   size_t nblocks;
 public:
   synchronized_unordered_map2(size_t numblocks):data(numblocks),
                                                lock(numblocks),
                                                nblocks(numblocks) {
    for (size_t i = 0;i < numblocks; ++i) {
      data[i].max_load_factor(1.0);
    }
  }

   std::pair<bool, Data*> find(size_t key) {
    size_t b = key % nblocks;
     iterator iter = data[b].find(key);
     std::pair<bool, Data*> ret = std::make_pair(iter != data[b].end(), &(iter->second));
     return ret;
   }

   /**
   return std::pair<found, iterator>
   if not found, iterator is invalid
   */
   std::pair<bool, const Data*> find(size_t key) const {
     size_t b = key % nblocks;
     const_iterator iter = data[b].find(key);
     std::pair<bool, const Data*> ret = std::make_pair(iter != data[b].end(), &(iter->second));
     return ret;
   }

   // care must be taken that  you do not access an erased iterator
   void erase(size_t key) {
     size_t b = key % nblocks;
     data[b].erase(key);
   }

   template<typename Predicate>
   void erase_if(size_t key, Predicate pred) {
     size_t b = key % nblocks;
     iterator iter = data[b].find(key);

     if (iter != data[b].end() && pred(iter->second))  data[b].erase(key);
   }

   value_type& insert(size_t key, const value_type &val) {
     size_t b = key % nblocks;
     data[b][key] = val;
     value_type& ret = data[b][key];
     return ret;
   }

   void read_critical_section(size_t key) {
    size_t b = key % nblocks;
    lock[b].readlock();
   }
   void write_critical_section(size_t key) {
    size_t b = key % nblocks;
    lock[b].writelock();
   }
   void release_critical_section(size_t key) {
    size_t b = key % nblocks;
    lock[b].unlock();
   }
    /**
    returns std::pair<success, iterator>
    on success, iterator will point to the entry
    on failure, iterator will point to an existing entry
    */
   std::pair<bool, Data*> insert_with_failure_detect(size_t key, const value_type &val) {
     std::pair<bool, Data*> ret ;

     size_t b = key % nblocks;
     //search for it
     iterator iter = data[b].find(key);
     // if it not in the table, write and return
     if (iter == data[b].end()) {
      data[b][key] = val;
      ret = std::make_pair(true,  &(data[b].find(key)->second));
     }
     else {
      ret = std::make_pair(false,  &(iter->second));
     }
     return ret;
   }

   void clear() {
     for (size_t i = 0;i < data.size(); ++i) {
       data[i].clear();
     }
   }
};
}
#endif


================================================
FILE: src/graphlab/util/system_usage.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_SYSTEM_USAGE_HPP
#define GRAPHLAB_SYSTEM_USAGE_HPP


#include <sys/resource.h>


namespace graphlab {


};


#endif


================================================
FILE: src/graphlab/util/timer.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <signal.h>
#include <sys/time.h>

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/timer.hpp>

std::ostream&  operator<<(std::ostream& out, const graphlab::timer& t) {
  return out << t.current_time();
} 

namespace graphlab {
  void alarm_wakeup(int i);
  
  
  class hundredms_timer {
  public:
    hundredms_timer() {    
      stop = false;
      tout_val.it_interval.tv_sec = 0;
      tout_val.it_interval.tv_usec = 0;
      tout_val.it_value.tv_sec = 0;
      tout_val.it_value.tv_usec = 50000;
      signal(SIGALRM,alarm_wakeup); /* set the Alarm signal capture */    
      setitimer(ITIMER_REAL, &tout_val,0);
      ti.start();
    }
    size_t ctr; 
    timer ti;
    struct itimerval tout_val;
    bool stop;
    
    ~hundredms_timer() {  
      stop = true;
      signal(SIGALRM, SIG_IGN);
    }
  };
  
  hundredms_timer hmstimer;
  
  
  void alarm_wakeup(int i) {
    if (hmstimer.stop) return;
    signal(SIGALRM,alarm_wakeup);
    // compute the desired time till the next 100ms tick by using a real timer call
    double realtime = hmstimer.ti.current_time() ;
    // round down
    hmstimer.ctr = (size_t)(realtime * 10);
    setitimer(ITIMER_REAL, &(hmstimer.tout_val), 0);   
  }

  /**
   * Precision of deciseconds 
   */
  float timer::approx_time_seconds() {
    return float(hmstimer.ctr) / 10;
  }

  /**
   * Precision of deciseconds 
   */
  size_t timer::approx_time_millis() {
    return hmstimer.ctr * 100;
  }


  void timer::sleep(size_t sleeplen) {
    struct timespec timeout;
    timeout.tv_sec = sleeplen;
    timeout.tv_nsec = 0;
    while (nanosleep(&timeout, &timeout) == -1);
  }


  /**
  Sleeps for sleeplen milliseconds.
  */
  void timer::sleep_ms(size_t sleeplen) {
    struct timespec timeout;
    timeout.tv_sec = sleeplen / 1000;
    timeout.tv_nsec = (sleeplen % 1000) * 1000000;
    while (nanosleep(&timeout, &timeout) == -1);
  }
  

static unsigned long long rtdsc_ticks_per_sec = 0; 
static mutex rtdsc_ticks_per_sec_mutex;

unsigned long long estimate_ticks_per_second() {
  if (rtdsc_ticks_per_sec == 0) {
    rtdsc_ticks_per_sec_mutex.lock();
      if (rtdsc_ticks_per_sec == 0) {
      unsigned long long tstart = rdtsc();
      graphlab::timer::sleep(1);
      unsigned long long tend = rdtsc();
      rtdsc_ticks_per_sec = tend - tstart;
      }
    rtdsc_ticks_per_sec_mutex.unlock();
  }
  return rtdsc_ticks_per_sec;
}

}


================================================
FILE: src/graphlab/util/timer.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_TIMER_HPP
#define GRAPHLAB_TIMER_HPP

#include <sys/time.h>
#include <stdio.h>

#include <iostream>

namespace graphlab {
  /**
   * \ingroup util 
   *
   * \brief A simple class that can be used for benchmarking/timing up
   * to microsecond resolution.
   *
   * Standard Usage
   * =================
   *
   * The timer is used by calling \ref graphlab::timer::start and then
   * by getting the current time since start by calling 
   * \ref graphlab::timer::current_time.
   * 
   * For example:
   * 
   * \code
   * #include <graphlab.hpp>
   *
   *
   * graphlab::timer timer;
   * timer.start();
   * // do something
   * std::cout << "Elapsed time: " << timer.current_time() << std::endl; 
   * \endcode
   *
   * Fast approximate time
   * ====================
   *
   * Calling current item in a tight loop can be costly and so we
   * provide a faster less accurate timing primitive which reads a
   * local time variable that is updated roughly every 100 millisecond.
   * These are the \ref graphlab::timer::approx_time_seconds and
   * \ref graphlab::timer::approx_time_millis.
   */
  class timer {
  private:
    /**
     * \brief The internal start time for this timer object
     */
    timeval start_time_;   
  public:
    /**
     * \brief The timer starts on construction but can be restarted by
     * calling \ref graphlab::timer::start.
     */
    inline timer() { start(); }
    
    /**
     * \brief Reset the timer.
     */
    inline void start() { gettimeofday(&start_time_, NULL); }
    
    /** 
     * \brief Returns the elapsed time in seconds since 
     * \ref graphlab::timer::start was last called.
     *
     * @return time in seconds since \ref graphlab::timer::start was called.
     */
    inline double current_time() const {
      timeval current_time;
      gettimeofday(&current_time, NULL);
      double answer = 
       // (current_time.tv_sec + ((double)current_time.tv_usec)/1.0E6) -
       // (start_time_.tv_sec + ((double)start_time_.tv_usec)/1.0E6);
        (double)(current_time.tv_sec - start_time_.tv_sec) + 
        ((double)(current_time.tv_usec - start_time_.tv_usec))/1.0E6;
       return answer;
    } // end of current_time

    /** 
     * \brief Returns the elapsed time in milliseconds since 
     * \ref graphlab::timer::start was last called.
     *
     * @return time in milliseconds since \ref graphlab::timer::start was called.
     */
    inline double current_time_millis() const { return current_time() * 1000; }

    /**
     * \brief Get the number of seconds (as a floating point value)
     * since the Unix Epoch.
     */
    static double sec_of_day() {
      timeval current_time;
      gettimeofday(&current_time, NULL);
      double answer = 
        (double)current_time.tv_sec + ((double)current_time.tv_usec)/1.0E6;
      return answer;
    } // end of sec_of_day

    /**
     * \brief Returns only the micro-second component of the 
     * time since the Unix Epoch.
     */
    static size_t usec_of_day() {
      timeval current_time;
      gettimeofday(&current_time, NULL);
      size_t answer = 
        (size_t)current_time.tv_sec * 1000000 + (size_t)current_time.tv_usec;
      return answer;
    } // end of usec_of_day

    /**
     * \brief Returns the time since program start.
     * 
     * This value is only updated once every 100ms and is therefore
     * approximate (but fast).
     */
    static float approx_time_seconds();
    
    /**
     * \brief Returns the time since program start.
     * 
     * This value is only updated once every 100ms and is therefore
     * approximate (but fast).
     */
    static size_t approx_time_millis();

    /**
     * Sleeps for sleeplen seconds
     */
    static void sleep(size_t sleeplen);

    /**
     * Sleeps for sleeplen milliseconds.
     */
    static void sleep_ms(size_t sleeplen);
  }; // end of Timer
  

  unsigned long long estimate_ticks_per_second();

  #if defined(__i386__)
  static inline unsigned long long rdtsc(void)
  {
    unsigned long long int x;
      __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
      return x;
  }
  #elif defined(__x86_64__)
  static inline unsigned long long rdtsc(void)
  {
    unsigned hi, lo;
    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
    return ( (unsigned long long)lo) | ( ((unsigned long long)hi)<<32 );
  }
  #else
  static inline unsigned long long rdtsc(void) {
    return 0;
  }
  #endif


} // end of graphlab namespace

/** 
 * Convenience function. Allows you to call "cout << ti" where ti is
 * a timer object and it will print the number of seconds elapsed
 * since ti.start() was called.
 */
std::ostream&  operator<<(std::ostream& out, const graphlab::timer& t);


#endif


================================================
FILE: src/graphlab/util/tracepoint.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <limits>
#include <string>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/util/tracepoint.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <boost/unordered_map.hpp>


namespace graphlab {

void trace_count::print(std::ostream& out, unsigned long long tpersec) const {
  if (tpersec == 0) {
    out << name << ": " << description << "\n";
    out << "Events:\t" << count.value << "\n";
    out << "Total:\t" << total.value << "ticks \n";
    if (count.value > 0) {
      out << "Mean:\t" << (double)total.value / count.value << "ticks \n";
      out << "Min:\t" << minimum << "ticks \n";
      out << "Max:\t" << maximum << "ticks \n";
    }
  }
  else {
    double tperms = (double)tpersec / 1000;
    out << name << ": " << description << "\n";
    out << "Events:\t" << count.value << "\n";
    out << "Total:\t" << (double)total.value / tperms << " ms \n";
    if (count.value > 0) {
      out << "Mean:\t" << (double)total.value / count.value / tperms << " ms \n";
      out << "Min:\t" << (double)minimum / tperms << " ms \n";
      out << "Max:\t" << (double)maximum / tperms << " ms \n";
    }
  }
}


static mutex printlock;

trace_count::~trace_count() {
#ifdef USE_TRACEPOINT
  printlock.lock();
  print(std::cout, estimate_ticks_per_second());
  std::cout.flush();
  printlock.unlock();
#endif
}

} // namespace graphlab


================================================
FILE: src/graphlab/util/tracepoint.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_TRACEPOINT_HPP
#define GRAPHLAB_UTIL_TRACEPOINT_HPP
#include <iostream>
#include <vector>
#include <string>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/branch_hints.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/parallel/atomic_ops.hpp>

namespace graphlab{

struct trace_count{
  std::string name;
  std::string description;
  bool print_on_destruct;
  graphlab::atomic<unsigned long long> count;
  graphlab::atomic<unsigned long long> total;
  unsigned long long minimum;
  unsigned long long maximum;
  inline trace_count(std::string name = "",
                    std::string description = "",
                    bool print_on_destruct = true):
                      name(name),
                      description(description),
                      print_on_destruct(print_on_destruct),
                      count(0),
                      total(0),
                      minimum(std::numeric_limits<unsigned long long>::max()),
                      maximum(0) { }

  /**
   * Initializes the tracer with a name, a description
   * and whether to print on destruction
   */
  inline void initialize(std::string n,
                  std::string desc,
                  bool print_out = true) {
    name = n;
    description = desc;
    print_on_destruct = print_out;
  }

  /**
   * Adds an event time to the trace
   */
  inline void incorporate(unsigned long long val)  __attribute__((always_inline)) {
    count.inc();
    total.inc(val);
    while(1) {
      unsigned long long m = minimum;
      if (__likely__(val > m || graphlab::atomic_compare_and_swap(minimum, m, val))) break;
    }
    while(1) {
      unsigned long long m = maximum;
      if (__likely__(val < m || graphlab::atomic_compare_and_swap(maximum, m, val))) break;
    }
  }
  
  /**
   * Adds the counts in a second tracer to the current tracer.
   */  
  inline void incorporate(const trace_count &val)  __attribute__((always_inline)) {
    count.inc(val.count.value);
    total.inc(val.total.value);
    while(1) {
      unsigned long long m = minimum;
      if (__likely__(val.minimum > m || graphlab::atomic_compare_and_swap(minimum, m, val.minimum))) break;
    }
    while(1) {
      unsigned long long m = maximum;
      if (__likely__(val.maximum < m || graphlab::atomic_compare_and_swap(maximum, m, val.maximum))) break;
    }
  }

  /**
   * Adds the counts in a second tracer to the current tracer.
   */
  inline trace_count& operator+=(trace_count &val) {
    incorporate(val);
    return *this;
  }

  /**
   * Destructor. Will print to cout if initialize() is called
   * with "true" as the 3rd argument
   */
  ~trace_count();

  /**
   * Prints the tracer counts
   */
  void print(std::ostream& out, unsigned long long tpersec = 0) const;
};

} // namespace

/**
 * DECLARE_TRACER(name)
 * creates a tracing object with a given name. This creates a variable
 * called "name" which is of type trace_count. and is equivalent to:
 *
 * graphlab::trace_count name;
 * 
 * The primary reason to use this macro instead of just writing
 * the code above directly, is that the macro is ignored and compiles
 * to nothing when tracepoints are disabled.
 *
 *
 * INITIALIZE_TRACER(name, desc)
 * The object with name "name" created by DECLARE_TRACER must be in scope.
 * This initializes the tracer "name" with a description, and
 * configures the tracer to print when the tracer "name" is destroyed.
 *
 *
 * INITIALIZE_TRACER_NO_PRINT(name, desc)
 * The object with name "name" created by DECLARE_TRACER must be in scope.
 * This initializes the tracer "name" with a description, and
 * configures the tracer to NOT print when the tracer "name" is destroyed.
 *
 * BEGIN_TRACEPOINT(name)
 * END_TRACEPOINT(name)
 * The object with name "name" created by DECLARE_TRACER must be in scope.
 * Times a block of code. Every END_TRACEPOINT must be matched with a
 * BEGIN_TRACEPOINT within the same scope. Tracepoints are parallel.
 * 

  
Example Usage:
  DECLARE_TRACER(classname_someevent)
  INITIALIZE_TRACER(classname_someevent, "hello world");
  Then later on...
  BEGIN_TRACEPOINT(classname_someevent)
  ...
  END_TRACEPOINT(classname_someevent)
 *
*/


#ifdef USE_TRACEPOINT
#define DECLARE_TRACER(name) graphlab::trace_count name;

#define INITIALIZE_TRACER(name, description) name.initialize(#name, description);
#define INITIALIZE_TRACER_NO_PRINT(name, description) name.initialize(#name, description, false);

#define BEGIN_TRACEPOINT(name) unsigned long long __ ## name ## _trace_ = rdtsc();
#define END_TRACEPOINT(name) name.incorporate(rdtsc() - __ ## name ## _trace_);
#define END_AND_BEGIN_TRACEPOINT(endname, beginname) unsigned long long __ ## beginname ## _trace_ = rdtsc(); \
                                                     endname.incorporate(__ ## beginname ## _trace_ - __ ## endname ## _trace_);

#define CREATE_ACCUMULATING_TRACEPOINT(name) graphlab::trace_count __ ## name ## _acc_trace_; \
                                             unsigned long long __ ## name ## _acc_trace_elem_;
#define BEGIN_ACCUMULATING_TRACEPOINT(name) __ ## name ## _acc_trace_elem_ = rdtsc();
#define END_ACCUMULATING_TRACEPOINT(name) __ ## name ## _acc_trace_.incorporate(rdtsc() - __ ## name ## _acc_trace_elem_);

#define END_AND_BEGIN_ACCUMULATING_TRACEPOINT(endname, beginname) __ ## beginname ## _acc_trace_elem_ = rdtsc(); \
                                                                  __ ## endname ## _acc_trace_.incorporate(__ ## beginname ## _acc_trace_elem_ - __ ## endname ## _acc_trace_elem_)

#define STORE_ACCUMULATING_TRACEPOINT(name) name.incorporate(__ ## name ## _acc_trace_);
#else
#define DECLARE_TRACER(name)
#define INITIALIZE_TRACER(name, description)
#define INITIALIZE_TRACER_NO_PRINT(name, description) 

#define BEGIN_TRACEPOINT(name) 
#define END_TRACEPOINT(name) 

#define CREATE_ACCUMULATING_TRACEPOINT(name) 
#define BEGIN_ACCUMULATING_TRACEPOINT(name) 
#define END_ACCUMULATING_TRACEPOINT(name)
#define STORE_ACCUMULATING_TRACEPOINT(name)

#define END_AND_BEGIN_ACCUMULATING_TRACEPOINT(endname, beginname)
#define END_AND_BEGIN_TRACEPOINT(endname, beginname) 
#endif           

#endif


================================================
FILE: src/graphlab/util/uint128.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UINT128_HPP
#define GRAPHLAB_UINT128_HPP
#include <stdint.h>
#include <iostream>
#include <iomanip>
#include <graphlab/serialization/serialization_includes.hpp>

namespace graphlab {
  
/**
 * A 128 bit numeric type. This type is a union of a 16-byte character array (bytes),
 * and struct of two 64-bit integers (ints.high and ints.low).
 */
union gl_uint128_t {
  struct {
    uint64_t high;
    uint64_t low;  
  } ints;
  char bytes[16];
  
  gl_uint128_t() { }
  
  /**
   * Constructs a 128-bit type from a 64-bit value.
   * It simply clears the "high" 64 bits of the 128-bit integer, and sets
   * the low 64-bits to the input
   */
  explicit gl_uint128_t(uint64_t val) {
    ints.high = 0;
    ints.low = val;
  }
};

/**
 * Sets all 128bits of the the gl_uint128_t to 'true'.
 * Or the 128-bit integer representation of "-1"
 */
inline gl_uint128_t fill_128b() {
  gl_uint128_t i;
  i.ints.high = (uint64_t)(-1);
  i.ints.low = (uint64_t)(-1);
  return i;
}

/**
 * Prints the 128-bit integer as hexadecimal
 */
inline std::ostream& operator<<(std::ostream& out, const gl_uint128_t &val) {
  static char hexchar[17] = "0123456789abcdef";
  
  for (size_t i = 0;i < 16; ++i) {
    out << hexchar[(val.bytes[i] >> 4) & 15];
    out << hexchar[val.bytes[i] & 15];
  }
  return out;
}

}

SERIALIZABLE_POD(graphlab::gl_uint128_t);

#endif


================================================
FILE: src/graphlab/util/union_find.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_UTIL_UNION_FIND_HPP
#define GRAPHLAB_UTIL_UNION_FIND_HPP
#include <vector>
#include <utility>
#include <graphlab/parallel/atomic.hpp>

namespace graphlab {
// IDType must be an integer type and its maximum 
// value must be larger than the length of the sequence
template <typename IDType, typename RankType>
class union_find {
  private:
    std::vector<std::pair<IDType, RankType> > setid;

    bool is_root(IDType i) {
      return setid[i].first == (IDType)i;
    }
    
  public:
    union_find() { }
    void init(IDType s) {
      setid.resize((size_t)s);
      for (size_t i = 0; i < setid.size() ;++i) {
        setid[i].first = (IDType)(i);
        setid[i].second = 0;
      }
    }
    
    void merge(IDType i, IDType j) {
      IDType iroot = find(i);
      IDType jroot = find(j);
      if (iroot == jroot) return;
      else if (setid[iroot].second < setid[jroot].second) {
        setid[iroot].first = jroot;
      }
      else if (setid[iroot].second > setid[jroot].second) {
        setid[jroot].first = iroot;
      }
      else {
        setid[jroot].first = iroot;
        // make sure we don't overflow
        if (setid[iroot].second + 1 > setid[iroot].second) {
          setid[iroot].second = setid[iroot].second + 1;
        }
      }
    }

    IDType find(IDType i) {
      IDType root = i;
      if (is_root(root)) return root;
      
      // get the id of the root element
      while (!is_root(root)) { root = setid[root].first; }
      
      // update the parents and ranks all the way up
      IDType cur = i;
      while (!is_root(cur)) {
        IDType parent = setid[cur].first;
        setid[cur].first = root;
        cur = parent;
      }
      
      return setid[i].first;
    }
};


class concurrent_union_find {
  private:
    union elem{
      struct {
        uint32_t next;
        uint32_t rank;
      } d;
      uint64_t val;
    };
    
    std::vector<elem> setid;

    bool is_root(uint32_t i) {
      return setid[i].d.next == i;
    }

    bool updateroot(uint32_t x, uint32_t oldrank,
                    uint32_t y, uint32_t newrank) {
      elem old; old.d.next = x; old.d.rank = oldrank;
      elem newval; newval.d.next = y; newval.d.rank = newrank;
      return atomic_compare_and_swap(setid[x].val, old.val, newval.val);
    }
    
  public:
    concurrent_union_find() { }
    void init(uint32_t s) {
      setid.resize((size_t)s);
      for (size_t i = 0; i < setid.size() ;++i) {
        setid[i].d.next = (uint32_t)(i);
        setid[i].d.rank = 0;
      }
    }

    void merge(uint32_t x, uint32_t y) {

      uint32_t xr, yr;
      while(1) {
        x = find(x);
        y = find(y);
        if (x == y) return;
        xr = setid[x].d.rank;
        yr = setid[y].d.rank;

        if (xr > yr || (xr == yr && x > y)) {
          std::swap(x,y); std::swap(xr, yr);
        }

        if (updateroot(x, xr, y, xr)) break;
      }
      if (xr == yr) {
        __sync_add_and_fetch(&(setid[y].d.rank), 1);
      }
    }

    uint32_t find(uint32_t x) {
      if (is_root(x)) return x;
      
      uint32_t y = x;
      // get the id of the root element
      while (!is_root(x)) { x = setid[x].d.next; }

      // update the parents and ranks all the way up
      while (setid[y].d.rank < setid[x].d.rank) {
        uint32_t t = setid[y].d.next;
        atomic_compare_and_swap(setid[y].d.next, t, x);
        y = setid[t].d.next;
      }
      return x;
    }
};
}
#endif

================================================
FILE: src/graphlab/util/util_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/binary_parser.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/util/small_set.hpp>
// #include <graphlab/util/charstream.hpp>
// #include <graphlab/util/cache.hpp>
#include <graphlab/util/fs_util.hpp>
#include <graphlab/util/hdfs.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/util/empty.hpp>
#include <graphlab/util/web_util.hpp>


================================================
FILE: src/graphlab/util/web_util.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <graphlab/util/web_util.hpp>
#include <graphlab/util/stl_util.hpp>


namespace graphlab {
  namespace web_util {

    std::string url_decode(const std::string& url) {
#define HEXTOI(x) (isdigit(x) ? x - '0' : x - 'W')
      std::string ret_str;
      for (size_t i = 0; i < url.size(); ++i) {
        if (url[i] == '%' && 
            (i+1 < url.size() && isxdigit(url[i+1])) &&
            (i+1 < url.size() && isxdigit(url[i+2]))) {
          const char a = tolower(url[i+1]);
          const char b = tolower(url[i+2]);
          const char new_char = ((HEXTOI(a) << 4) | HEXTOI(b));
          i += 2;
          ret_str.push_back(new_char);
        } else if (url[i] == '+') {
          ret_str.push_back(' ');
        } else {
          ret_str.push_back(url[i]);
        }
      }
#undef HEXTOI
      return ret_str;
    } // end of url decode


    std::map<std::string, std::string> parse_query(const std::string& query) {
      std::vector<std::string> pairs = graphlab::strsplit(query, ",=", true);
      std::map<std::string, std::string> map;
      for(size_t i = 0; i+1 < pairs.size(); i+=2) 
        map[url_decode(pairs[i])] = url_decode(pairs[i+1]);
      return map;
    } // end of parse url query

  } // end of namespace web_util
 
}; // end of namespace GraphLab


================================================
FILE: src/graphlab/util/web_util.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_WEB_UTIL_HPP
#define GRAPHLAB_WEB_UTIL_HPP

#include <string>
#include <map>


namespace graphlab {
  namespace web_util {

    /**
     * \brief decode a url by converting escape characters
     */
    std::string url_decode(const std::string& url); 

    /**
     * \brief convert a query string into a map
     */
    std::map<std::string, std::string> parse_query(const std::string& query);

  } // end of namespace web_util
 
}; // end of namespace GraphLab
#endif


================================================
FILE: src/graphlab/version.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_VERSION_HPP
#define GRAPHLAB_VERSION_HPP

#define GRAPHLAB_VERSION "2_1_0"

#define GRAPHLAB_VERSION_MAJOR 2
#define GRAPHLAB_VERSION_MINOR 1
#define GRAPHLAB_VERSION_REVISION 0

#endif


================================================
FILE: src/graphlab/vertex_program/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: src/graphlab/vertex_program/context.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_CONTEXT_HPP
#define GRAPHLAB_CONTEXT_HPP

#include <set>
#include <vector>
#include <cassert>

#include <graphlab/macros_def.hpp>
namespace graphlab {


  /**
   * \brief The context object mediates the interaction between the
   * vertex program and the graphlab execution environment and
   * implements the \ref icontext interface.
   *
   * \tparam Engine the engine that is using this context.
   */
  template<typename Engine>
  class context : 
    public icontext<typename Engine::graph_type,
                    typename Engine::gather_type,
                    typename Engine::message_type> {
  public:
    // Type members ===========================================================
    /** The engine that created this context object */
    typedef Engine engine_type;

    /** The parent type */
    typedef icontext<typename Engine::graph_type,
                     typename Engine::gather_type,
                     typename Engine::message_type> icontext_type;
    typedef typename icontext_type::graph_type graph_type;
    typedef typename icontext_type::vertex_id_type vertex_id_type;
    typedef typename icontext_type::vertex_type vertex_type;   
    typedef typename icontext_type::message_type message_type;
    typedef typename icontext_type::gather_type gather_type;


  private:
    /** A reference to the engine that created this context */
    engine_type& engine;
    /** A reference to the graph that is being operated on by the engine */
    graph_type& graph;
       
  public:        
    /** 
     * \brief Construct a context for a particular engine and graph pair.
     */
    context(engine_type& engine, graph_type& graph) : 
      engine(engine), graph(graph) { }

    size_t num_vertices() const { return graph.num_vertices(); }

    /**
     * Get the number of edges in the graph
     */
    size_t num_edges() const { return graph.num_edges(); }

    // /**
    //  * Get an estimate of the number of update functions executed up
    //  * to this point.
    //  */
    // size_t num_updates() const { return engine.num_updates(); }

    size_t procid() const { return graph.procid(); }
      
    size_t num_procs() const { return graph.numprocs(); }

    std::ostream& cout() const {
      return graph.dc().cout();
    }

    std::ostream& cerr() const {
      return graph.dc().cerr();
    }

    /**
     * Get the elapsed time in seconds
     */
    float elapsed_seconds() const { return engine.elapsed_seconds(); }

    /**
     * Return the current interation number (if supported).
     */
    int iteration() const { return engine.iteration(); }

    /**
     * Force the engine to stop executing additional update functions.
     */
    void stop() { engine.internal_stop(); }

    /**
     * Send a message to a vertex.
     */
    void signal(const vertex_type& vertex, 
                const message_type& message = message_type()) {
      engine.internal_signal(vertex, message);
    }

    /**
     * Send a message to an arbitrary vertex ID.
     * \warning If sending to neighboring vertices, the \ref signal()
     * function is more efficientas it permits sender side message combining.
     */
    void signal_vid(vertex_id_type vid, 
                    const message_type& message = message_type()) {
      engine.internal_signal_gvid(vid, message);
    }


    /**
     * Post a change to the cached sum for the vertex
     */
    void post_delta(const vertex_type& vertex, 
                    const gather_type& delta) {
      engine.internal_post_delta(vertex, delta);
    }

    /**
     * Invalidate the cached gather on the vertex.
     */
    virtual void clear_gather_cache(const vertex_type& vertex) { 
      engine.internal_clear_gather_cache(vertex);      
    }


  }; // end of context
  
} // end of namespace
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/vertex_program/icontext.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ICONTEXT_HPP
#define GRAPHLAB_ICONTEXT_HPP

#include <set>
#include <vector>
#include <cassert>
#include <iostream>

#include <graphlab/macros_def.hpp>
namespace graphlab {


  /**
   * \brief The context object mediates the interaction between the
   * vertex program and the graphlab execution environment.
   *
   * Each of the vertex program (see \ref ivertex_program) methods is
   * passed a reference to the engine's context.  The context allows
   * vertex programs to access information about the current execution
   * and send information (through icontext::signal,
   * icontext::post_delta, and icontext::clear_gather_cache) to the
   * graphlab engines (see \ref iengine).
   *
   * \tparam GraphType the type of graph (typically \ref distributed_graph)
   * \tparam GatherType the user defined gather type (see 
   * \ref ivertex_program::gather_type).
   * \tparam MessageType the user defined message type (see 
   * \ref ivertex_program::message_type).
   */
  template<typename GraphType,
           typename GatherType, 
           typename MessageType>
  class icontext {
  public:
    // Type members ===========================================================
    
    /**
     * \brief the user graph type (typically \ref distributed_graph)
     */
    typedef GraphType graph_type;   

    /**
     * \brief the opaque vertex_type defined in the ivertex_program::graph_type
     * (typically distributed_graph::vertex_type)
     */
    typedef typename graph_type::vertex_type vertex_type;

    /**
     * \brief the global vertex identifier (see
     * graphlab::vertex_id_type).
     */
    typedef typename graph_type::vertex_id_type vertex_id_type;

    /**
     * The message type specified by the user-defined vertex-program.
     * (see ivertex_program::message_type)
     */
    typedef MessageType message_type;

    /**
     * The type returned by the gather operation.  (see
     * ivertex_program::gather_type)
     */
    typedef GatherType gather_type;

   
  public:        
    /** \brief icontext destructor */
    virtual ~icontext() { }
    
    /**
     * \brief Get the total number of vertices in the graph.
     *
     * \return the total number of vertices in the entire graph.
     */
    virtual size_t num_vertices() const { return 0; }

    /**
     * \brief Get the number of edges in the graph.
     *
     * Each direction counts as a separate edge.
     *
     * \return the total number of edges in the entire graph.
     */
    virtual size_t num_edges() const { return 0; }

    /**
     * \brief Get the id of this process.
     *
     * The procid is a number between 0 and 
     * \ref graphlab::icontext::num_procs
     * 
     * \warning Each process may have many threads
     *
     * @return the process of this machine.
     */
    virtual size_t procid() const { return 0; }

    /**
     * \brief Returns a standard output object (like cout)
     *        which only prints once even when running distributed.
     * 
     * This returns a C++ standard output stream object
     * which maps directly to std::cout on machine with 
     * process ID 0, and to empty output streamss
     * on all other processes. Calling,
     * \code
     *   context.cout() << "Hello World!";
     * \endcode
     * will therefore only print if the code is run on machine 0.
     * This is useful in the finalize operation in aggregators.
     */
    virtual std::ostream& cout() const { return std::cout; }

    /**
     * \brief Returns a standard error object (like cerr)
     *        which only prints once even when running distributed.
     * 
     * This returns a C++ standard output stream object
     * which maps directly to std::cerr on machine with 
     * process ID 0, and to empty output streamss
     * on all other processes. Calling,
     * \code
     *   context.cerr() << "Hello World!";
     * \endcode
     * will therefore only print if the code is run on machine 0.
     * This is useful in the finalize operation in aggregators.
     */
    virtual std::ostream& cerr() const { return std::cerr; } 

    /**
     * \brief Get the number of processes in the current execution.
     *
     * This is typically the number of mpi jobs created:
     * \code
     * %> mpiexec -n 16 ./pagerank
     * \endcode
     * would imply that num_procs() returns 16.
     *
     * @return the number of processes in the current execution
     */
    virtual size_t num_procs() const { return 0; }

    /**
     * \brief Get the elapsed time in seconds since start was called.
     * 
     * \return runtine in seconds
     */
    virtual float elapsed_seconds() const { return 0.0; }

    /**
     * \brief Return the current interation number (if supported).
     *
     * \return the current interation number if support or -1
     * otherwise.
     */
    virtual int iteration() const { return -1; } 

    /**
     * \brief Signal the engine to stop executing additional update
     * functions.
     *
     * \warning The execution engine will stop *eventually* and
     * additional update functions may be executed prior to when the
     * engine stops. For-example the synchronous engine (see \ref
     * synchronous_engine) will complete the current super-step before
     * terminating.
     */
    virtual void stop() { } 

    /**
     * \brief Signal a vertex with a particular message.
     *
     * This function is an essential part of the GraphLab abstraction
     * and is used to encode iterative computation. Typically a vertex
     * program will signal neighboring vertices during the scatter
     * phase.  A vertex program may choose to signal neighbors on when
     * changes made during the previos phases break invariants or warrant
     * future computation on neighboring vertices.
     * 
     * The signal function takes two arguments. The first is mandatory
     * and specifies which vertex to signal.  The second argument is
     * optional and is used to send a message.  If no message is
     * provided then the default message is used.
     *
     * \param vertex [in] The vertex to send the message to
     * \param message [in] The message to send, defaults to message_type(). 
     */
    virtual void signal(const vertex_type& vertex, 
                        const message_type& message = message_type()) { }

    /**
     * \brief Send a message to a vertex ID.
     *
     * \warning This function will be slow since the current machine
     * do not know the location of the vertex ID.  If possible use the
     * the icontext::signal call instead.
     *
     * \param gvid [in] the vertex id of the vertex to signal
     * \param message [in] the message to send to that vertex, 
     * defaults to message_type().
     */
    virtual void signal_vid(vertex_id_type gvid, 
                            const message_type& message = message_type()) { }

    /**
     * \brief Post a change to the cached sum for the vertex
     * 
     * Often a vertex program will be signaled due to a change in one
     * or a few of its neighbors.  However the gather operation will
     * be rerun on all neighbors potentially producing the same value
     * as previous invocations and wasting computation time.  To
     * address this some engines support caching (see \ref
     * gather_caching for details) of the gather phase.
     *
     * When caching is enabled the engines save a copy of the previous
     * gather for each vertex.  On subsequent calls to gather if their
     * is a cached gather then the gather phase is skipped and the
     * cached value is passed to the ivertex_program::apply function.
     * Therefore it is the responsibility of the vertex program to
     * update the cache values for neighboring vertices. This is
     * accomplished by using the icontext::post_delta function.
     * Posted deltas are atomically added to the cache.
     *
     * \param vertex [in] the vertex whose cache we want to update
     * \param delta [in] the change that we want to *add* to the
     * current cache.
     *
     */
    virtual void post_delta(const vertex_type& vertex, 
                            const gather_type& delta) { } 

    /**
     * \brief Invalidate the cached gather on the vertex.
     *
     * When caching is enabled clear_gather_cache clears the cache
     * entry forcing a complete invocation of the subsequent gather.
     *
     * \param vertex [in] the vertex whose cache to clear.
     */
    virtual void clear_gather_cache(const vertex_type& vertex) { } 

  }; // end of icontext
  
} // end of namespace
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/vertex_program/ivertex_program.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 * Also contains code that is Copyright 2011 Yahoo! Inc.  All rights
 * reserved. 
 *
 */

#ifndef GRAPHLAB_IVERTEX_PROGRAM_HPP
#define GRAPHLAB_IVERTEX_PROGRAM_HPP


#include <graphlab/vertex_program/icontext.hpp>
#include <graphlab/util/empty.hpp>
#include <graphlab/graph/graph_basic_types.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/vertex_program/op_plus_eq_concept.hpp>

#include <graphlab/macros_def.hpp>

#if defined(__cplusplus) && __cplusplus >= 201103L
// for whatever reason boost concept is broken under C++11. 
// Temporary workaround. TOFIX
#undef BOOST_CONCEPT_ASSERT
#define BOOST_CONCEPT_ASSERT(unused)
#endif


namespace graphlab {
  
  /**
   * \brief The ivertex_program class defines the vertex program
   * interface that all vertex programs should extend and implement.
   * The vertex-program is used to encode the user-define computation
   * in a GraphLab program.
   *
   * Overview
   * ==================
   *
   * A vertex program represents the primary user defined computation
   * in GraphLab.  A unique instance of the vertex program is run on
   * each vertex in the graph and can interact with neighboring vertex
   * programs through the gather and scatter functions as well as by
   * signaling neighboring vertex-programs.  Conceptually the
   * vertex-program is a class which represents the parts of an
   * update-function in the original formulation of the GraphLab
   * abstraction.  Moreover many graph-structured programs can be
   * written in the following pattern:
   *
   * \code
   * graphlab::update_function(Vertex center, Neighborhood nbrs) {
   *   // nbrs represents the state of neighboring vertices and edges
   *
   *   // Gather Phase: 
   *   sum = EMPTY;
   *   for(edge in nbrs.in_edges()) {
   *      // The sum is a general commutative associative operation
   *      if(sum == EMPTY) sum = gather_function(center, edge, edge.neighbor());
   *      else sum += gather_function(center, edge, edge.neighbor());
   *   }
   *
   *   // Apply Phase:
   *   center = apply_function(center, sum);
   *
   *   // Scatter Phase:
   *   for(edge in nbrs.out_edges()) {
   *     edge = scatter_function(center, edge, edge.neighbor());
   *     if(condition is met) trigger_neighbor();
   *   }
   *
   * }
   * \endcode
   *
   * Vertex programs express computation by implementing what we call
   * the *Gather-Apply-Scatter (GAS)* model which decomposes the
   * vertex program into a parallel gather phase, followed by an
   * atomic apply phase, and finally a parallel scatter phase.  This
   * decomposition allows us to execute a single vertex program on
   * several machines simultaneously and move computation to the data.  
   *
   * We therefore decompose the update function logic into member
   * functions of the vertex-program class that are invoked in the
   * following manner:
   *
   * \code
   * For the center vertex vtx:
   *   vprog.init(ctx, vtx, msg);
   *   // Gather Phase: 
   *   vprog::gather_type sum = EMPTY;
   *   ParallelFor(adjacent edges in direction vprog.gather_edges(ctx, vtx) )
   *     if(sum == EMPTY) sum = vprog.gather(ctx, vtx, edge);
   *     else sum += vprog.gather(ctx, vtx, edge);
   *   // Apply Phase
   *   vprog.apply(ctx, vtx, sum);
   *   // Scatter Phase
   *   ParallelFor(adjacent edges in direction vprog.scatter_edges(ctx, vtx) )
   *     vprog.scatter(ctx, vtx, edge);
   *   // Vertex program is destroyed
   *   vprog = vertex_program();
   * \endcode
   *
   * All user define vertex programs must extend the ivertex_program
   * interface and implement the ivertex_program::apply function.
   * Most vertex programs will also implement the
   * ivertex_program::gather and ivertex_program::scatter functions.
   *
   * The state of a vertex program *does not* persist between
   * invocations of \ref ivertex_program::init.  Moreover prior to
   * each call to init the vertex program's previous state is
   * cleared. Therefore any persistent state must be saved into the
   * vertex data.
   *
   * The vertex program depends on several key types which are
   * template arguments to ivertex_program interface.
   * 
   * \li graph_type: the type of graph used to store the data for this
   * vertex program.  This currently always the distributed_graph.
   *
   * \li gather_type: the type used in the gather phase and must
   * implement the operator+= function.
   *
   * \li message_type: The type used for signaling and is typically
   * empty.  However if a message type is desired it must implement
   * the operator+= to allow message merging across the network.  In
   * addition the message type may also implement the priority()
   * function which returns a double assigning a priority to the
   * reception of the message (used by the asynchronous engines). We
   * provide a basic set of simple prioritized messages in 
   * \ref graphlab::signals.
   *
   * All user-defined types including the vertex data, edge data,
   * vertex-program, gather type, and message type must be
   * serializable (see \ref sec_serializable) and default
   * constructible to enable movement between machines.
   *
   * Advanced Features
   * ======================
   *
   * While the basic Gather-Apply-Scatter approach to graph structure
   * computation can express a wide range of algorithms there are some
   * situation where additional features could either simplify the
   * design or provide additional efficiency. 
   *
   *
   * Messaging 
   * ----------------------
   * 
   * Vertex-programs can trigger adjacent vertex programs by sending a
   * signal which can contain a message to neighbor vertices.  By
   * default the message type is empty however it is possible for the
   * user to define a message type.  For example the following
   * message_type could be used to implement pagerank:
   *
   * \code
   * struct pagerank_message : public graphlab::IS_POD_TYPE {
   *   double value;
   *   double priority() const { return std::fabs(value); }
   *   message_type& operator+=(const message_type& other) {
   *     value += other.value; 
   *     return *this;
   *   }
   * }; 
   * \endcode
   *
   * Unlike other messaging abstractions, GraphLab always _merges_
   * messages destined to the same vertex.  This allows the GraphLab
   * engines to minimize network communication and more evenly balance
   * computation.  Messages are combined using the operator+=
   * function.
   *
   * As mentioned earlier some engines may prioritize the _reception_
   * of messages.  Messages can optionally (it is not required)
   * provide a priority function which is used to prioritize message
   * reception.  The engine then attempts to prioritize the reception
   * of higher priority messages first.
   *
   * The message is received in the \ref ivertex_program::init
   * function.  The single message passed into 
   * \ref ivertex_program::init represents the sum of all messages 
   * destined to that vertex since the vertex-program was last invoked.
   * 
   * The GraphLab messaging framework allows us to write
   * Pregel-like vertex-programs of the form:
   *
   * \code
   * typedef graphlab::empty gather_type;
   * class pregel_pagerank : 
   *   public ivertex_program<graph_type, gather_type, pagerank_message>,
   *   public graphlab::IS_POD_TYPE {
   *
   *   // Store a local copy of the message data
   *   double message_value;
   *
   *   // Receive the inbound message (sum of messages)
   *   void init(icontext_type& context, const vertex_type& vertex, 
   *             const message_type& msg) { 
   *     message_value = message.value;
   *   }
   *
   *   // Skip the gather phase
   *   edge_dir_type gather_edges(icontext_type& context,
   *                              const vertex_type& vertex) const { 
   *     return graphlab::NO_EDGES; 
   *   }
   *
   *   // Update the pagerank using the message
   *   void apply(icontext_type& context, vertex_type& vertex, 
   *              const gather_type& total) {
   *     vertex.data() += message_value;      
   *   }
   *
   *   // Scatter along out edges
   *   edge_dir_type scatter_edges(icontext_type& context,
   *                               const vertex_type& vertex) const { 
   *     return OUT_EDGES; 
   *   }
   *
   *   // Compute new messages encoding the change in the pagerank of
   *   // adjacent vertices.
   *   void scatter(icontext_type& context, const vertex_type& vertex, 
   *                edge_type& edge) const { 
   *     pagerank_message msg;
   *     msg.value = message_value * (1 - RESET_PROBABILITY);
   *     context.signal(edge.target(), msg);
   *   }
   * }; 
   * \endcode
   *
   * Notice that the gather phase is skipped and instead the gather
   * computation is accomplished using the messages.  However unlike
   * Pregel the scatter function which computs and sends the new
   * message is actually run on the machine that is receiving the
   * message.  
   *
   * The message abstraction is surprisingly powerful and can often
   * often express computation that can be written using the Gather
   * operation.  However, the message combination is done outside of
   * the consistency model and so can lead to more confusing code.
   *
   * Gather Caching
   * ---------------------
   *
   * In many applications the gather computation can be costly and
   * high-degree vertices will be signaled often even only a small
   * fraction of its neighbors values have changed.  In this case
   * running the gather function on all neighbors can be wasteful.  To
   * address this important issue the GraphLab engines expose a gather
   * caching mechanism.  However to take advantage of the gather
   * caching the vertex-program must notify the engine when a cache is
   * no longer valid and can even correct the cache to ensure that it
   * remains valid.
   *
   * \todo finish documenting gather caching
   *
   * 
   * 
   */
  template<typename Graph,
           typename GatherType,
           typename MessageType = graphlab::empty> 
  class ivertex_program {    
  public:

    // User defined type members ==============================================
    /**
     * \brief The user defined vertex data associated with each vertex
     * in the graph (see \ref distributed_graph::vertex_data_type).
     *
     * The vertex data is the data associated with each vertex in the
     * graph.  Unlike the vertex-program the vertex data of adjacent
     * vertices is visible to other vertex programs during the gather
     * and scatter phases and persists between executions of the
     * vertex-program.
     *
     * The vertex data type must be serializable 
     * (see \ref sec_serializable)
     */
    typedef typename Graph::vertex_data_type vertex_data_type;

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab Requires that the vertex data type be Serializable.  See
     * \ref sec_serializable for details. 
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<vertex_data_type>));
    /// \endcond


    /**
     * \brief The user defined edge data associated with each edge in
     * the graph.
     *
     * The edge data type must be serializable 
     * (see \ref sec_serializable)
     *
     */
    typedef typename Graph::edge_data_type edge_data_type;

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab Requires that the edge data type be Serializable.  See
     * \ref sec_serializable for details. 
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<edge_data_type>));
    /// \endcond


    /**
     * \brief The user defined gather type is used to accumulate the
     * results of the gather function during the gather phase and must
     * implement the operator += operation.
     *
     * The gather type plays the following role in the vertex program:
     * 
     * \code
     * gather_type sum = EMPTY;
     * for(edges in vprog.gather_edges()) {
     *   if(sum == EMPTY) sum = vprog.gather(...);
     *   else sum += vprog.gather( ... );
     * }
     * vprog.apply(..., sum);
     * \endcode
     *
     * In addition to implementing the operator+= operation the gather
     * type must also be serializable (see \ref sec_serializable).
     */
    typedef GatherType gather_type;
    
    /**
     * \cond GRAPHLAB_INTERNAL
     * \brief GraphLab Requires that the gather type be default
     * constructible.
     *
     * \code
     * class gather_type {
     * public:
     *   gather_type() { }
     * };  
     * \endcode
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<GatherType>));
    /// \endcond

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab Requires that gather type be Serializable.  See
     * \ref sec_serializable for detials
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<GatherType>));
    /// \endcond

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab Requires that gather type support operator+=.
     */
    BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<GatherType>));
    /// \endcond


    /**
     * \cond GRAPHLAB_INTERNAL
     *
     *  \brief GraphLab Requires that the gather type be serializable
     *
     * \code
     * class gather_type {
     * public:
     *   gather_type() { }
     * };  
     * \endcode
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<GatherType>));
    /// \endcond


    /**
     * The message type which must be provided by the vertex_program
     */
    typedef MessageType message_type;


    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab requires that the message type be default
     * constructible.
     *
     * \code
     * class message_type {
     * public:
     *   message_type() { }
     * };  
     * \endcode
     * 
     */
    BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<MessageType>));
    /// \endcond 

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab requires that the message type be Serializable.
     * See \ref sec_serializable for detials
     */
    BOOST_CONCEPT_ASSERT((graphlab::Serializable<MessageType>));
    /// \endcond

    /**
     * \cond GRAPHLAB_INTERNAL
     *
     * \brief GraphLab requires that message type support operator+=.
     */
    BOOST_CONCEPT_ASSERT((graphlab::OpPlusEq<MessageType>));
    /// \endcond


    // Graph specific type members ============================================
    /**
     * \brief The graph type associative with this vertex program. 
     *
     * The graph type is specified as a template argument and will
     * usually be \ref distributed_graph.
     */
    typedef Graph graph_type;

    /**
     * \brief The unique integer id used to reference vertices in the graph.
     * 
     * See \ref graphlab::vertex_id_type for details.
     */
    typedef typename graph_type::vertex_id_type vertex_id_type;
    
    /**
     * \brief The opaque vertex object type used to get vertex
     * information.
     *
     * The vertex type is defined by the graph.  
     * See \ref distributed_graph::vertex_type for details.
     */
    typedef typename graph_type::vertex_type vertex_type;
    
    /**
     * \brief The opaque edge_object type used to access edge
     * information.
     * 
     * The edge type is defined by the graph.  
     * See \ref distributed_graph::edge_type for details.
     */
    typedef typename graph_type::edge_type edge_type;

    /**
     * \brief The type used to define the direction of edges used in
     * gather and scatter.
     * 
     * Possible values include:
     *
     * \li graphlab::NO_EDGES : Do not process any edges
     * 
     * \li graphlab::IN_EDGES : Process only inbound edges to this
     * vertex
     * 
     * \li graphlab::OUT_EDGES : Process only outbound edges to this
     * vertex
     *
     * \li graphlab::ALL_EDGES : Process both inbound and outbound
     * edges on this vertes.
     * 
     * See \ref graphlab::edge_dir_type for details.
     */
    typedef graphlab::edge_dir_type edge_dir_type;

    // Additional Types =======================================================
    
    /**
     * \brief The context type is used by the vertex program to
     * communicate with the engine.
     *
     * The context and provides facilities for signaling adjacent
     * vertices (sending messages), interacting with the GraphLab
     * gather cache (posting deltas), and accessing engine state.
     *
     */
    typedef icontext<graph_type, gather_type, message_type> icontext_type;
   
    // Functions ==============================================================
    /**
     * \brief Standard virtual destructor for an abstract class.
     */
    virtual ~ivertex_program() { }

    /**
     * \brief This called by the engine to receive a message to this
     * vertex program.  The vertex program can use this to initialize
     * any state before entering the gather phase.  The init function
     * is invoked _once_ per execution of the vertex program.
     *
     * If the vertex program does not implement this function then the
     * default implementation (NOP) is used.
     *
     * \param [in,out] context The context is used to interact with the engine
     *
     * \param [in] vertex The vertex on which this vertex-program is
     * running. Note that the vertex is constant and its value should
     * not be modified within the init function.  If there is some
     * message state that is needed then it must be saved to the
     * vertex-program and not the vertex data.
     *
     * \param [in] message The sum of all the signal calls to this
     * vertex since it was last run.
     */
    virtual void init(icontext_type& context,
                      const vertex_type& vertex, 
                      const message_type& msg) { /** NOP */ }
    

    /**
     * \brief Returns the set of edges on which to run the gather
     * function.  The default edge direction is in edges.
     *
     * The gather_edges function is invoked after the init function
     * has completed.
     *
     * \warning The gather_edges function may be invoked multiple
     * times for the same execution of the vertex-program and should
     * return the same value.  In addition it cannot modify the
     * vertex-programs state or the vertex data.
     *
     * Possible return values include:
     *
     * \li graphlab::NO_EDGES : The gather phase is completely skipped
     * potentially reducing network communication.
     * 
     * \li graphlab::IN_EDGES : The gather function is only run on
     * inbound edges to this vertex.
     * 
     * \li graphlab::OUT_EDGES : The gather function is only run on
     * outbound edges to this vertex.
     *
     * \li graphlab::ALL_EDGES : The gather function is run on both
     * inbound and outbound edges to this vertes.
     * 
     * \param [in,out] context The context is used to interact with
     * the engine
     *
     * \param [in] vertex The vertex on which this vertex-program is
     * running. Note that the vertex is constant and its value should
     * not be modified.
     * 
     * \return One of graphlab::NO_EDGES, graphlab::IN_EDGES,
     * graphlab::OUT_EDGES, or graphlab::ALL_EDGES.
     * 
     */
    virtual edge_dir_type gather_edges(icontext_type& context,
                                       const vertex_type& vertex) const { 
      return IN_EDGES; 
    }


    /**
     * \brief The gather function is called on all the 
     * \ref ivertex_program::gather_edges in parallel and returns the 
     * \ref gather_type which are added to compute the final output of 
     * the gather phase.  
     *
     * The gather function is the core computational element of the
     * Gather phase and is responsible for collecting the information
     * about the state of adjacent vertices and edges.  
     *
     * \warning The gather function is executed in parallel on
     * multiple machines and therefore cannot modify the
     * vertex-program's state or the vertex data.
     *
     * A default implementation of the gather function is provided
     * which will fail if invoked. 
     *
     * \param [in,out] context The context is used to interact with
     * the engine
     *
     * \param [in] vertex The vertex on which this vertex-program is
     * running. Note that the vertex is constant and its value should
     * not be modified.
     *
     * \param [in,out] edge The adjacent edge to be processed.  The
     * edge is not constant and therefore the edge data can be
     * modified. 
     *
     * \return the result of the gather computation which will be
     * "summed" to produce the input to the apply operation.  The
     * behavior of the "sum" is defined by the \ref gather_type.
     * 
     */
    virtual gather_type gather(icontext_type& context, 
                               const vertex_type& vertex, 
                               edge_type& edge) const {
      logstream(LOG_FATAL) << "Gather not implemented!" << std::endl;
      return gather_type();
    };


    /**
     * \brief The apply function is called once the gather phase has
     * completed and must be implemented by all vertex programs.
     *
     * The apply function is responsible for modifying the vertex data
     * and is run only once per vertex per execution of a vertex
     * program.  In addition the apply function can modify the state
     * of the vertex program.
     *
     * If a vertex has no neighbors than the apply function is called
     * passing the default value for the gather_type.
     *
     * \param [in,out] context The context is used to interact with
     * the engine
     *
     * \param [in,out] vertex The vertex on which this vertex-program is
     * running. 
     *
     * \param [in] total The result of the gather phase.  If a vertex
     * has no neighbors then the total is the default value (i.e.,
     * gather_type()) of the gather type.
     * 
     */
    virtual void apply(icontext_type& context, 
                       vertex_type& vertex, 
                       const gather_type& total) = 0;

    /**
     * \brief Returns the set of edges on which to run the scatter
     * function.  The default edge direction is out edges.
     *
     * The scatter_edges function is invoked after the apply function
     * has completed.
     *
     * \warning The scatter_edges function may be invoked multiple
     * times for the same execution of the vertex-program and should
     * return the same value.  In addition it cannot modify the
     * vertex-programs state or the vertex data.
     *
     * Possible return values include:
     *
     * \li graphlab::NO_EDGES : The scatter phase is completely
     * skipped potentially reducing network communication.
     * 
     * \li graphlab::IN_EDGE : The scatter function is only run on
     * inbound edges to this vertex.
     * 
     * \li graphlab::OUT_EDGES : The scatter function is only run on
     * outbound edges to this vertex.
     *
     * \li graphlab::ALL_EDGES : The scatter function is run on both
     * inbound and outbound edges to this vertes.
     * 
     * \param [in,out] context The context is used to interact with
     * the engine
     *
     * \param [in] vertex The vertex on which this vertex-program is
     * running. Note that the vertex is constant and its value should
     * not be modified.
     * 
     * \return One of graphlab::NO_EDGES, graphlab::IN_EDGES,
     * graphlab::OUT_EDGES, or graphlab::ALL_EDGES.
     * 
     */
    virtual edge_dir_type scatter_edges(icontext_type& context,
                                        const vertex_type& vertex) const { 
      return OUT_EDGES; 
    }

    /**
     * \brief Scatter is called on all scatter_edges() in parallel
     * after the apply function has completed and is typically
     * responsible for updating edge data, signaling (messaging)
     * adjacent vertices, and updating the gather cache state when
     * caching is enabled.
     *
     * The scatter function is almost identical to the gather function
     * except that nothing is returned. 
     *
     * \warning The scatter function is executed in parallel on
     * multiple machines and therefore cannot modify the
     * vertex-program's state or the vertex data.
     *
     * A default implementation of the gather function is provided
     * which will fail if invoked. 
     *
     * \param [in,out] context The context is used to interact with
     * the engine
     *
     * \param [in] vertex The vertex on which this vertex-program is
     * running. Note that the vertex is constant and its value should
     * not be modified.
     *
     * \param [in,out] edge The adjacent edge to be processed.  The
     * edge is not constant and therefore the edge data can be
     * modified. 
     *
     */
    virtual void scatter(icontext_type& context, const vertex_type& vertex, 
                         edge_type& edge) const { 
      logstream(LOG_FATAL) << "Scatter not implemented!" << std::endl;
    };


    /** 
     * \internal
     * Used to signal the start of a local gather.
     * Called on each machine which is doing a gather operation.
     * Semantics are that, a complete gather involves
     * 
     * \code
     * On each machine with edges adjacent to vertex being updated:
     *   vprogram.pre_local_gather(g) // passed by reference
     *   foreach edge adjacent to vertex:
     *     if ( ... first gather ... ) g = vprogram.gather(edge)
     *     else g += vprogram.gather(edge)
     *   end
     *   vprogram.post_local_gather(g) // passed by reference
     * \endcode
     */
    virtual void pre_local_gather(gather_type&) const {
    }

    /** 
     * \internal
     * Used to signal the end of a local gather.
     * Called on each machine which is doing a gather operation.
     * Semantics are that, a complete gather involves
     * 
     * \code
     * On each machine with edges adjacent to vertex being updated:
     *   vprogram.pre_local_gather(g) // passed by reference
     *   foreach edge adjacent to vertex:
     *     if ( ... first gather ... ) g = vprogram.gather(edge)
     *     else g += vprogram.gather(edge)
     *   end
     *   vprogram.post_local_gather(g) // passed by reference
     * \endcode
     */
    virtual void post_local_gather(gather_type&) const {
    }

  };  // end of ivertex_program
 
}; //end of namespace graphlab
#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: src/graphlab/vertex_program/messages.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#ifndef GRAPHLAB_MESSAGES_HPP
#define GRAPHLAB_MESSAGES_HPP

#include <graphlab/serialization/serialization_includes.hpp>

namespace graphlab {

  namespace messages {

    /**
     * The priority of two messages is the sum
     */
    struct sum_priority : public graphlab::IS_POD_TYPE {
      double value;
      sum_priority(const double value = 0) : value(value) { }
      double priority() const { return value; }
      sum_priority& operator+=(const sum_priority& other) {
        value += other.value;
        return *this;
      }
    }; // end of sum_priority message

    /**
     * The priority of two messages is the max
     */
    struct max_priority : public graphlab::IS_POD_TYPE {
      double value;
      max_priority(const double value = 0) : value(value) { }
      double priority() const { return value; }
      max_priority& operator+=(const max_priority& other) {
        value = std::max(value, other.value);
        return *this;
      }
    }; // end of max_priority message


  }; // end of messages namespace


}; // end of graphlab namespace
#endif


================================================
FILE: src/graphlab/vertex_program/op_plus_eq_concept.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_OP_PLUS_EQ_CONCEPT
#define GRAPHLAB_OP_PLUS_EQ_CONCEPT

#include <boost/concept/assert.hpp>
#include <boost/concept/requires.hpp>
#include <boost/concept_check.hpp>
#include <sstream>
#include <graphlab/serialization/serialize.hpp>


namespace graphlab {

  /**
   * \brief Concept checks if a type T supports operator+=
   *
   * This is a concept checking class for boost::concept and can be
   * used to enforce that a type T is "additive."  In particular many
   * types in GraphLab (e.g., messages, gather_type, as well as
   * aggregation types) must support operator+=.  To achieve this the
   * class should implement:
   *
   * \code
   * class gather_type {
   *   int member1;
   * public:
   *   gather_type& operator+=(const gather_type& other) {
   *     member1 += other.member1;
   *     return *this;
   *   } // end of operator+=
   * };
   * \endcode
   *
   * \tparam T The type to test for additivity
   */
  template <typename T>
  class OpPlusEq :  boost::Assignable<T>, public boost::DefaultConstructible<T> {
   public:
    BOOST_CONCEPT_USAGE(OpPlusEq) {
      T t1 = T();
      const T t2 = T();
      // A compiler error on these lines implies that your type does
      // not support operator+= when this is required (e.g.,
      // gather_type or aggregator types)
      t1 += t2;
    }
  };
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/vertex_program/vertex_program_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/vertex_program/ivertex_program.hpp>
#include <graphlab/vertex_program/messages.hpp>
#include <graphlab/vertex_program/icontext.hpp>


================================================
FILE: src/graphlab/warp.hpp
================================================
#include <graphlab/engine/warp_engine.hpp>
#include <graphlab/engine/warp_graph_broadcast.hpp>
#include <graphlab/engine/warp_graph_mapreduce.hpp>
#include <graphlab/engine/warp_graph_transform.hpp>
#include <graphlab/engine/warp_parfor_all_vertices.hpp>


================================================
FILE: src/graphlab/zookeeper/key_value.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstdio>
#include <cstdlib>
#include <map>
#include <iostream>
#include <algorithm>
#include <vector>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <graphlab/zookeeper/zookeeper_common.hpp>
#include <graphlab/zookeeper/key_value.hpp>
extern "C" {
#include <zookeeper/zookeeper.h>
}

namespace graphlab{
namespace zookeeper {


key_value::key_value(std::vector<std::string> zkhosts,
                     std::string _prefix,
                     std::string serveridentifier):
    prefix(_prefix), next_callback_id(0), closing(false) {
  serveridentifier = serveridentifier;
  // construct hosts list
  std::string hosts = boost::algorithm::join(zkhosts, ",");
  prefix = normalize_path(prefix);
  if (prefix[0] != '/') prefix = "/" + prefix;
  // we need to block the watcher from running until everything is ready
  handle = zookeeper_init(hosts.c_str(), watcher, 10000, NULL, (void*)this, 0);
  assert(handle != NULL);
  // create the prefix if it does not already exist
  if (prefix != "/") create_dir(handle,
                                prefix.substr(0, prefix.length() - 1),
                                "zk_key_value");
  if (prefix != "/") {
    masters_path = prefix + "masters/";
    values_path = prefix + "values/";
  }
  else {
    masters_path = "/masters/";
    values_path = "/values/";
  }

  create_dir(handle,
             masters_path.substr(0, masters_path.length() - 1),
             "zk_key_value");

  create_dir(handle,
             values_path.substr(0, values_path.length() - 1),
             "zk_key_value");

  datalock.lock();
  std::vector<std::string> unused1, unused2, unused3;
  get_all_keys_locked(unused1, unused2, unused3);
  datalock.unlock();
}

key_value::~key_value() {
  if (handle == NULL) return;
  datalock.lock();
  closing = true;
  datalock.unlock();
  // cleanup
  std::set<std::string>::const_iterator iter = my_values.begin();
  while (iter != my_values.end()) {
    int version = data[*iter].remote_version;
    if (version >= 0) {
      std::string value_node = get_sequence_node_path(values_path + (*iter) + "-",
                                                     version);
      delete_node(handle, value_node, "zk_key_value cleanup");
    }
    std::string master_node = masters_path + (*iter);
    delete_node(handle, master_node, "zk_key_value cleanup");
    ++iter;
  }
  /* should not try to delete it. It will mess up any watches.

  delete_dir(handle,
             masters_path.substr(0, masters_path.length() - 1),
             "zk_key_value cleanup");
  delete_dir(handle,
             values_path.substr(0, values_path.length() - 1),
             "zk_key_value cleanup");
  if (prefix != "/") delete_dir(handle,
                                prefix.substr(0, prefix.length() - 1),
                                "zk_key_value cleanup");
  */
  zookeeper_close(handle);
}


/** Inserts a value to the key value store. Returns true on success.
 * False on failure (indicating the key already exists)
 */
bool key_value::insert(const std::string& key, const std::string& value) {
  if (key.length() == 0) return false;
  if (my_values.count(key)) return modify(key, value);
  // ok try to create the master node
  int ret = create_ephemeral_node(handle,
                                  masters_path + key,
                                  serveridentifier,
                                  "zk_key_value insert");
  if (ret == ZNODEEXISTS) return false;
  else {
    // ok we own this key
    my_values.insert(key);
    return modify(key, value);
  }
}


bool key_value::modify(const std::string& key, const std::string& value) {
  if (key.length() == 0) return false;
  if (my_values.count(key) == 0) return false;
  datalock.lock();
  // add a - to the end
  std::pair<int, int> ret = create_ephemeral_sequence_node(handle,
                                                           values_path + key + "-",
                                                           value,
                                                           "zk_key_value modify");
  assert(ret.first == ZOK);
  // update the cache
  lazy_value& val = data[key];
  val.has_value = true;
  int prev_remote_version = val.remote_version;
  val.stored_version = ret.second;
  val.value = value;
  // try to delete the previous remote version node
  if (prev_remote_version >= 0) {
    std::string old_node_path = get_sequence_node_path(values_path + key + "-",
                                                       prev_remote_version);
    delete_node(handle, old_node_path, "zk_key_value modify-cleanup");
  }
  datalock.unlock();
  return true;
}


bool key_value::erase(const std::string& key) {
  if (key.length() == 0) return false;
  if (my_values.count(key) == 0) return false;
  datalock.lock();
  lazy_value& val = data[key];
  // find the current version
  int cur_remote_version = std::max(val.stored_version, val.remote_version);
  // try to delete it
  if (cur_remote_version >= 0) {
    std::string old_node_path = get_sequence_node_path(values_path + key + "-",
                                                       cur_remote_version);
    delete_node(handle, old_node_path, "zk_key_value erase-value");
  }
  std::string master_node = masters_path + key;
  delete_node(handle, master_node, "zk_key_value erase-master");
  // don't fully remove it from the data map yet
  // let the trigger take care of it
  my_values.erase(my_values.find(key));
  datalock.unlock();
  return true;
}


bool key_value::get_all_keys_locked(
    std::vector<std::string>& out_newkeys,
    std::vector<std::string>& out_deletedkeys,
    std::vector<std::string>& out_modifiedkeys) {


  struct String_vector children;
  children.count = 0;
  children.data = NULL;

  // get a list of all the keys and set the watch
  std::string values_node = values_path.substr(0, values_path.length() - 1);
  std::string master_node = masters_path.substr(0, masters_path.length() - 1);
  int stat = zoo_get_children(handle, values_node.c_str(), 1, &children);
  if (stat == ZCLOSING) return false;
  if (stat != 0) {
    print_stat(stat, "zk_key_value get_all_keys values", values_path);
    return false;
  }

  struct String_vector masters;
  stat = zoo_get_children(handle, master_node.c_str(), 1, &masters);
  if (stat == ZCLOSING) return false;
  if (stat != 0) {
    print_stat(stat, "zk_key_value get_all_keys masters", masters_path);
    free_String_vector(&children);
    return false;
  }

  std::vector<std::string> masterkeys = String_vector_to_vector(&masters);
  std::vector<std::string> allkeys = String_vector_to_vector(&children);
  /*
  for (size_t i = 0;i < masterkeys.size(); ++i) {
    std::cout << "\t" << masterkeys[i] << "\n";
  }
  for (size_t i = 0;i < allkeys.size(); ++i) {
    std::cout << "\t" << allkeys[i] << "\n";
  }*/
  free_String_vector(&children);
  free_String_vector(&masters);

  fill_data_locked(allkeys, masterkeys, out_newkeys, out_deletedkeys, out_modifiedkeys);
  return true;
}

std::pair<bool, std::string> key_value::get(const std::string& key) {
  datalock.lock();
  // search for the key in the map
  std::map<std::string, lazy_value>::const_iterator iter = data.find(key);
  if (iter == data.end()) {
    datalock.unlock();
    return std::pair<bool, std::string>(false, "");
  }
  // see if we have a cached version
  if (iter->second.has_value) {
    // yup. we have a cached copy. return that
    std::pair<bool, std::string> value(true, iter->second.value);
    datalock.unlock();
    return value;
  }
  // otherwise, we need to get the data.
  // figure out the node we need to query
  int remote_version = iter->second.remote_version;
  std::string node = get_sequence_node_path(values_path + key + "-",
                                            remote_version);
  datalock.unlock();
  // ok. try to query the node
  //std::cout << "Getting value for " << node << "\n";
  std::pair<bool, std::string> value = get_node_value(handle, node, "zk_key_value get");
  // if successful, return that
  if (value.first) {
    // cache the value
    datalock.lock();
    std::map<std::string, lazy_value>::iterator iter = data.begin();
    if (iter != data.end() && iter->second.remote_version == remote_version) {
      iter->second.has_value = true;
      iter->second.stored_version = remote_version;
      iter->second.value = value.second;
    }
    datalock.unlock();
  }
  // otherwise... the node is missing. The watch should delete it eventually
  return value;
}


void key_value::fill_data_locked(const std::vector<std::string>& keys,
                                 const std::vector<std::string>& masterkeys,
                                 std::vector<std::string>& out_newkeys,
                                 std::vector<std::string>& out_deletedkeys,
                                 std::vector<std::string>& out_modifiedkeys) {
  std::set<std::string> masterkeyset;
  for (size_t i = 0;i < masterkeys.size(); ++i) {
    masterkeyset.insert(masterkeys[i]);
  }
  std::map<std::string, int> key_and_version;
  for (size_t i = 0;i < keys.size(); ++i) {
    // this must be a sequence node!
    assert(keys[i].length() > 10);
    // where the sequence number is expected to start
    size_t num_start = keys[i].length() - 10;
    size_t key_length = num_start - 1;

    // Ex: abc-1234567890
    // length = 14
    // num_start = 4
    // key_length = 3
    //
    // some sanity checks. The format must be [key]-%10d
    // check for the dash ('-')
    assert(num_start > 0);
    assert(keys[i][num_start - 1] == '-');
    int version = atoi(keys[i].c_str() + num_start);
    std::string keyname = keys[i].substr(0, key_length);
    if (masterkeyset.count(keyname)) {
      key_and_version[keyname] = std::max(version, key_and_version[keyname]);
    }
  }

  // now scan against the actual data and compute a diff
  // first search for deleted keys
  // scan the data map against the key_and_version map
  {
    std::map<std::string, lazy_value>::const_iterator iter = data.begin();
    while (iter != data.end()) {
      if (key_and_version.count(iter->first) == 0) {
        out_deletedkeys.push_back(iter->first);
      }
      ++iter;
    }
  }
  // now actually delete it from the data map
  for (size_t i = 0;i < out_deletedkeys.size(); ++i) {
    data.erase(data.find(out_deletedkeys[i]));
  }

  // ok. now loop through the key_and_version map and handle new and
  // modified keys
  {
    std::map<std::string, int>::const_iterator iter = key_and_version.begin();
    while (iter != key_and_version.end()) {
      std::map<std::string, lazy_value>::iterator data_iter = data.find(iter->first);
      if (data_iter == data.end()) {
        // key not found. this is a new key
        out_newkeys.push_back(iter->first);
        data[iter->first].remote_version = iter->second;
        data[iter->first].has_value = (data[iter->first].stored_version == data[iter->first].remote_version);
      } else {
        // key found. this is an existing key
        // if the remote version changed, it was modified
        if (data_iter->second.remote_version == -1) {
          out_newkeys.push_back(iter->first);
        }
        else if (data_iter->second.remote_version < iter->second) {
          out_modifiedkeys.push_back(iter->first);
        }
        // invalidate the local value
        data_iter->second.remote_version = std::max(data_iter->second.remote_version, iter->second);
        data[iter->first].has_value = (data[iter->first].stored_version == data[iter->first].remote_version);
      }
      ++iter;
    }
  }
}

// ------------- watch implementation ---------------

int key_value::add_callback(callback_type fn) {
  datalock.lock();
  size_t cur_callback_id = next_callback_id;
  callbacks[cur_callback_id] = fn;
  ++next_callback_id;
  datalock.unlock();
  return cur_callback_id;
}

bool key_value::remove_callback(int fnid) {
  bool ret = false;
  datalock.lock();
  std::map<int, callback_type>::iterator iter = callbacks.find(fnid);
  if (iter != callbacks.end()) {
    ret = true;
    callbacks.erase(iter);
  }
  datalock.unlock();
  return ret;
}

void key_value::watcher(zhandle_t *zh,
                        int type,
                        int state,
                        const char *path,
                        void *watcherCtx) {
  key_value* slist = reinterpret_cast<key_value*>(watcherCtx);
  if (type == ZOO_CHILD_EVENT) {
    slist->datalock.lock();
    if (!slist->closing) {
      std::vector<std::string> newkeys, deletedkeys, modifiedkeys;
      bool ret = slist->get_all_keys_locked(newkeys, deletedkeys, modifiedkeys);
      slist->datalock.unlock();
      if (ret && !slist->callbacks.empty()) {
        std::map<int, callback_type>::iterator iter = slist->callbacks.begin();
        while (iter != slist->callbacks.end()) {
          iter->second(slist, newkeys, deletedkeys, modifiedkeys);
          ++iter;
        }
      }
    } else {
      slist->datalock.unlock();
    }
  }
}


} // namespace zookeeper
} // namespace graphlab


================================================
FILE: src/graphlab/zookeeper/key_value.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef ZOOKEEPER_KEY_VALUE_HPP
#define ZOOKEEPER_KEY_VALUE_HPP
#include <map>
#include <set>
#include <vector>
#include <string>
#include <boost/function.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
extern "C" {
#include <zookeeper/zookeeper.h>
}


namespace graphlab{
namespace zookeeper {


/**
 *  A simple zookeeper service to maintain a key value store
 *  The service provides the ability to watch for changes
 *  through the use of callbacks.
 *  Keys are "owned" by their creators, and duplicate keys
 *  are not permitted. Owners can change the values of their owned keys.
 *  Keys are destroyed when their owners die.
 *
 *  The natural implementation will be to create a node for each key
 *  and have the node contain the actual value of the key. However, it is very
 *  difficult to watch for changes here because if there are a large number of
 *  keys, we have to set a data watch on each key, and zookeeper does not like
 *  it if you make too many watches.
 *
 *  The solution:
 *  For each key
 *   - An ephemeral masters/[key]" node is created.
 *     This is used to identify the machine currently owning the key, and make
 *     sure that there can only be one owner for each key.
 *   - A SEQUENCE EPHEMERAL node with the name "values/[key]-%10d" is created
 *     whenever the value of the key changes. The contents of the node
 *     are the contents of the key.
 *   - Now a single watch on the entire values directory is sufficient to
 *     identify any data changes.
 */
class key_value {
 public:

  ///  Joins a zookeeper cluster.
  ///  Zookeeper nodes will be created in the prefix "prefix".
  key_value(std::vector<std::string> zkhosts,
            std::string prefix,
            std::string serveridentifier);
  /// destructor
  ~key_value();

  /** Inserts a value to the key value store. Returns true on success.
   * False on failure (indicating the key already exists)
   */
  bool insert(const std::string& key, const std::string& value);

  /** Modifies the value in the key value store. Returns true on success.
   * False on failure. This instance must own the key (created the key)
   * to modify its value.
   */
  bool modify(const std::string& key, const std::string& value);

  /** Removed a key in the key value store. Returns true on success.
   * False on failure. This instance must own the key (created the key)
   * to delete the key.
   */
  bool erase(const std::string& key);


  /// Gets a value of a key. First element of the pair is if the key was found
  std::pair<bool, std::string> get(const std::string& key);


  typedef boost::function<void(key_value*,
                               const std::vector<std::string>& out_newkeys,
                               const std::vector<std::string>& out_deletedkeys,
                               const std::vector<std::string>& out_modifiedkeys)
                          >  callback_type;

  /** Adds a callback which will be triggered when any key/value
   * changes. The callback arguments will be the key_value object,
   * and the new complete key-value mapping.
   * Calling this function will a NULL argument deletes
   * the callback. Note that the callback may be triggered in a different thread.
   *
   * Returns the id of the callback. Calling remove_callback with the id
   * disables the callback.
   */
  int add_callback(callback_type fn);


  /** Removes a callback identified by an ID. Returns true on success,
   * false on failure */
  bool remove_callback(int fnid);
 private:
  std::string serveridentifier;
  std::string prefix;
  std::string masters_path;
  std::string values_path;
  zhandle_t* handle;

  recursive_mutex datalock;

  std::map<int, callback_type> callbacks;
  int next_callback_id;
  bool closing;

  // a list of all the values I created
  std::set<std::string> my_values;

  struct lazy_value {
    bool has_value;
    int stored_version;
    int remote_version;
    std::string value;
    lazy_value():has_value(false), stored_version(-1), remote_version(-1) {}
    lazy_value(const lazy_value& lv):
        has_value(lv.has_value),
        stored_version(lv.stored_version),
        remote_version(lv.remote_version),
        value(lv.value) {}
  };

  std::map<std::string, lazy_value> data;

  bool get_all_keys_locked(std::vector<std::string>& out_newkeys,
                           std::vector<std::string>& out_deletedkeys,
                           std::vector<std::string>& out_modifiedkeys);

  void fill_data_locked(const std::vector<std::string>& keys,
                        const std::vector<std::string>& masterkeys,
                        std::vector<std::string>& out_newkeys,
                        std::vector<std::string>& out_deletedkeys,
                        std::vector<std::string>& out_modifiedkeys);

  static void watcher(zhandle_t *zh,
                    int type,
                    int state,
                    const char *path,
                    void *watcherCtx);


};


} // namespace zookeeper
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/zookeeper/server_list.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <graphlab/zookeeper/zookeeper_common.hpp>
#include <graphlab/zookeeper/server_list.hpp>
#include <iostream>
#include <algorithm>
extern "C" {
#include <zookeeper/zookeeper.h>
}

namespace graphlab{
namespace zookeeper {

server_list::server_list(std::vector<std::string> zkhosts,
                         std::string _prefix,
                         std::string _serveridentifier) :
    prefix(_prefix), serveridentifier(_serveridentifier), callback(NULL) {
  // construct hosts list
  std::string hosts = boost::algorithm::join(zkhosts, ",");
  prefix = normalize_path(prefix);
  if (prefix[0] != '/') prefix = "/" + prefix;
  handle = zookeeper_init(hosts.c_str(), watcher, 10000, NULL, (void*)this, 0);
  // create the prefix if it does not already exist
  if (prefix != "/") create_dir(handle,
                                prefix.substr(0, prefix.length() - 1),
                                "zk_server_list");

  assert(handle != NULL);

}

server_list::~server_list() {
  if (handle != NULL) zookeeper_close(handle);
}


std::vector<std::string> server_list::get_all_servers(std::string name_space) {
  boost::algorithm::trim(name_space); assert(name_space.length() > 0);
  struct String_vector children;
  children.count = 0;
  children.data = NULL;

  std::vector<std::string> ret;

  // effective path is prefix + name_space
  std::string path = prefix + name_space;

  int stat = zoo_get_children(handle, path.c_str(), 0, &children);
  // if there are no children quit
  if (stat == ZNONODE) return ret;
  ret = String_vector_to_vector(&children);
  free_String_vector(&children);
  return ret;
}

/// Joins a namespace
void server_list::join(std::string name_space) {
  boost::algorithm::trim(name_space); assert(name_space.length() > 0);
  create_dir(handle,
             prefix + name_space,
             "zk_server_list");
  std::string path = normalize_path(prefix + name_space) + serveridentifier;
  int stat = create_ephemeral_node(handle, path, "");
  if (stat == ZNODEEXISTS) {
    std::cerr << "Server " << serveridentifier << " already exists!" << std::endl;
  }
  if (stat != ZOK) assert(false);
}

void server_list::leave(std::string name_space) {
  boost::algorithm::trim(name_space); assert(name_space.length() > 0);
  std::string path = normalize_path(prefix + name_space) + serveridentifier;
  delete_node(handle, path, "zk_server_list leave");
  // also try to delete its parents if they become empty
  delete_dir(handle, prefix + name_space, "zk_server_list leave cleanup");
  if (prefix != "/") delete_dir(handle,
                                prefix.substr(0, prefix.length() - 1),
                                "zk_server_list leave cleanup");
}


// ------------- watch implementation ---------------


std::vector<std::string> server_list::watch_changes(std::string name_space) {
  boost::algorithm::trim(name_space); assert(name_space.length() > 0);
  struct String_vector children;
  children.count = 0;
  children.data = NULL;
  std::vector<std::string> ret;

  std::string path = prefix + name_space;
  watchlock.lock();
  if (watches.count(path)) {
    watchlock.unlock();
    return get_all_servers(name_space);
  }
  watches.insert(path);

  int stat = zoo_get_children(handle, path.c_str(), 1, &children);
  watchlock.unlock();
  // if there are no children quit
  if (stat == ZNONODE) return ret;
  print_stat(stat, "zk_server_list watch_changes", path);
  ret = String_vector_to_vector(&children);
  free_String_vector(&children);
  return ret;

}

void server_list::stop_watching(std::string name_space) {
  boost::algorithm::trim(name_space); assert(name_space.length() > 0);
  std::string path = prefix + name_space;
  watchlock.lock();
  watches.erase(path);
  watchlock.unlock();
}

void server_list::set_callback(boost::function<void(server_list*,
                                                    std::string name_space,
                                                    std::vector<std::string> server)
                                              > fn) {
  watchlock.lock();
  callback = fn;
  watchlock.unlock();
}

void server_list::issue_callback(std::string path) {
  watchlock.lock();
  // search for the path in the watch set
  bool found = watches.count(path);
  if (found) {
    struct String_vector children;
    children.count = 0;
    children.data = NULL;
    std::vector<std::string> ret;
    // reissue the watch
    int stat = zoo_get_children(handle, path.c_str(), 1, &children);
    print_stat(stat, "zk serverlist issue_callback", path);
    ret = String_vector_to_vector(&children);
    free_String_vector(&children);

    // if a callback is registered
    if (callback != NULL) {
      callback(this, path, ret);
    }
  }
  watchlock.unlock();
}

void server_list::watcher(zhandle_t *zh,
                          int type,
                          int state,
                          const char *path,
                          void *watcherCtx) {
  server_list* slist = reinterpret_cast<server_list*>(watcherCtx);
  if (type == ZOO_CHILD_EVENT) {
    std::string strpath = path;
    slist->issue_callback(path);
  }
}


} // namespace zookeeper
} // namespace graphlab


================================================
FILE: src/graphlab/zookeeper/server_list.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ZOOKEEPER_SERVER_LIST_HPP
#define GRAPHLAB_ZOOKEEPER_SERVER_LIST_HPP

#include <set>
#include <vector>
#include <string>
#include <boost/function.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
extern "C" {
#include <zookeeper/zookeeper.h>
}
namespace graphlab{
namespace zookeeper {

/**
 *  A simple zookeeper service to maintain a list of servers.
 *  The service provides the ability to watch for new servers leaving
 *  and joining the service through the use of callbacks.
 */
class server_list {
 public:

  ///  Joins a zookeeper cluster.
  ///  Zookeeper nodes will be created in the prefix "prefix".
  ///  The current machine will be identified as "serveridentifier"
  server_list(std::vector<std::string> zkhosts,
              std::string prefix,
              std::string serveridentifier);

  /// destructor
  ~server_list();

  /// Joins a namespace
  void join(std::string name_space);

  /// Leaves a namespace. Note that if this machine crashes, or if the
  /// server list is destroyed, it will automatically leave the namespace.
  void leave(std::string name_space);

  /// gets a list of all servers in a namespace
  std::vector<std::string> get_all_servers(std::string name_space);

  /// Watches for changes to a namespace while returning the current contents
  /// When changes occur, the callback is called.
  std::vector<std::string> watch_changes(std::string name_space);

  /// Removes the watch callback.
  void stop_watching(std::string name_space);


  /** Adds a callback which will be triggered when any namespace in the prefix
   * changes. The callback arguments will be the server_list object, the
   * namespace which changed, and the new list of servers in the name space.
   * Calling this function will a NULL argument deletes
   * the callback. Note that the callback may be triggered in a different thread.
   */
  void set_callback(boost::function<void(server_list* cur,
                                         std::string name_space,
                                         std::vector<std::string> server)
                                         > fn);

 private:
  std::string prefix, serveridentifier;
  zhandle_t* handle;

  recursive_mutex watchlock;
  std::set<std::string> watches;

  boost::function<void(server_list*, std::string, std::vector<std::string>)> callback;

  void issue_callback(std::string path);

  static void watcher(zhandle_t *zh,
                    int type,
                    int state,
                    const char *path,
                    void *watcherCtx);


};

} // namespace zookeeper
} // namespace graphlab
#endif


================================================
FILE: src/graphlab/zookeeper/zookeeper_common.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cstdio>
#include <cstdlib>
#include <vector>
#include <string>
#include <iostream>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <graphlab/zookeeper/zookeeper_common.hpp>
extern "C" {
#include <zookeeper/zookeeper.h>
}


namespace graphlab{
namespace zookeeper {


// frees a zookeeper String_vector
void free_String_vector(struct String_vector* strings) {
  if (strings->data) {
    for (size_t i = 0;i < (size_t)(strings->count); ++i) {
      free(strings->data[i]);
    }
    free(strings->data);
    strings->data = NULL;
    strings->count = 0;
  }
}

// convert a zookeeper String_vector to a c++ vector<string>
std::vector<std::string> String_vector_to_vector(
    const struct String_vector* strings) {
  std::vector<std::string> ret;
  for (size_t i = 0;i < (size_t)(strings->count); ++i) {
    ret.push_back(strings->data[i]);
  }
  return ret;
}

// print a few zookeeper error status
void print_stat(int stat,
                const std::string& prefix,
                const std::string& path) {
  if (stat == ZNONODE) {
    std::cerr << prefix << ": Node missing" << path << std::endl;
  }
  else if (stat == ZNOAUTH) {
    std::cerr << prefix << ": No permission to list children of node "
              << path << std::endl;
  }
  else if (stat == ZNODEEXISTS) {
    std::cerr << prefix << ": Node " << path << " already exists." << std::endl;
  }
  else if (stat == ZNOTEMPTY) {
    std::cerr << prefix << ": Node " << path << " not empty." << std::endl;
  }
  else if (stat != ZOK) {
    std::cerr << prefix << ": Unexpected error " << stat
              << " on path " << path << std::endl;
  }
}

// adds a trailing / to the path name if there is not one already
std::string normalize_path(std::string prefix) {
  boost::algorithm::trim(prefix);
  if (prefix.length() == 0) return "/";
  else if (prefix[prefix.length() - 1] != '/') return prefix + "/";
  else return prefix;
}

int create_dir(zhandle_t* handle, const std::string& name,
               const std::string& stat_message) {
  int stat = zoo_create(handle, name.c_str(), NULL, -1,
                       &ZOO_OPEN_ACL_UNSAFE, 0, NULL, 0);
  // we are ok with ZNODEEXISTS
  // if (stat == ZOK || stat == ZNODEEXISTS) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " create_dir", name);
  return stat;
}

int delete_dir(zhandle_t* handle, const std::string& name,
               const std::string& stat_message) {
  int stat = zoo_delete(handle, name.c_str(), -1);
  // we are ok if the node is not empty in which case
  // there are still machines in the name space
  // if (stat == ZOK || stat == ZNOTEMPTY) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " delete_dir", name);
  return stat;
}

int create_ephemeral_node(zhandle_t* handle,
                 const std::string& path,
                 const std::string& value,
                 const std::string& stat_message) {
  int stat = zoo_create(handle, path.c_str(), value.c_str(), value.length(),
                        &ZOO_OPEN_ACL_UNSAFE, ZOO_EPHEMERAL, NULL, 0);
  // if (stat == ZOK) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " create_ephemeral_node", path);
  return stat;
}

int delete_node(zhandle_t* handle,
                           const std::string& path,
                           const std::string& stat_message) {
  int stat = zoo_delete(handle, path.c_str(), -1);
  //  if (stat == ZOK) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " delete_node", path);
  return stat;
}


std::string get_sequence_node_path(const std::string& path,
                                   const int version) {
  char versionstring[16];
  sprintf(versionstring, "%010d", version);
  std::string actualpath = path + versionstring;
  return actualpath;
}

int delete_sequence_node(zhandle_t* handle,
                         const std::string& path,
                         const int version,
                         const std::string& stat_message) {
  std::string actualpath = get_sequence_node_path(path, version);
  int stat = zoo_delete(handle, actualpath.c_str(), -1);
  //  if (stat == ZOK) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " delete_sequence_node", actualpath);
  return stat;
}

std::pair<int,int> create_ephemeral_sequence_node(zhandle_t* handle,
                                                  const std::string& path,
                                                  const std::string& value,
                                                  const std::string& stat_message) {
  // make sure we always have enough room for the version number
  assert(path.length() + 10 < 1024);
  char retpathbuffer[1024];
  int stat = zoo_create(handle, path.c_str(), value.c_str(), value.length(),
                        &ZOO_OPEN_ACL_UNSAFE, ZOO_EPHEMERAL | ZOO_SEQUENCE,
                        retpathbuffer, 1024);
  // if (stat == ZOK) return stat;
  if (stat != ZOK) print_stat(stat, stat_message + " create_ephemeral_sequence_node", path);
  int retlen = strlen(retpathbuffer);
  assert(retlen > 10);
  int version = atoi(retpathbuffer + (retlen - 10));
  return std::pair<int, int>(stat, version);
}


std::pair<bool, std::string> get_node_value(zhandle_t* handle,
                                            const std::string& node,
                                            const std::string& stat_message) {
  char buffer[1024];
  int length = 1024;
  int stat = zoo_get(handle, node.c_str(), 0, buffer, &length, NULL);
  if (stat != ZOK) print_stat(stat, stat_message + " get_node_value", node);
  if (stat != ZOK) return std::pair<bool, std::string>(false, "");

  // we are good here
  if (length <= 1024) {
    // ok. it fit inside the buffer
    // we can return
    if (length < 0) return std::pair<bool, std::string>(true, "");
    else return std::pair<bool, std::string>(true, std::string(buffer, length));
  }
  else {
    while(1) {
      // buffer not long enough. The length parameter constains the actual length
      // try again. keep looping until we succeed
      char* newbuffer = new char[length];
      int stat = zoo_get(handle, node.c_str(), 0, newbuffer, &length, NULL);
      if (stat != ZOK) print_stat(stat, stat_message + " get_node_value", node);
      std::string retval(newbuffer, length);
      delete newbuffer;

      if (stat != ZOK) print_stat(stat, stat_message + " get_node_value", node);
      if (stat != ZOK) return std::pair<bool, std::string>(false, "");
      if (length < 0) return std::pair<bool, std::string>(true, "");
      else return std::pair<bool, std::string>(true, retval);
    }
  }
}


} // graphlab
} // zookeeper


================================================
FILE: src/graphlab/zookeeper/zookeeper_common.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#ifndef GRAPHLAB_ZOOKEEPER_COMMON_HPP
#define GRAPHLAB_ZOOKEEPER_COMMON_HPP
#include <vector>
#include <string>

extern "C" {
#include <zookeeper/zookeeper.h>
}


namespace graphlab{
namespace zookeeper {

/// frees a zookeeper String_vector
void free_String_vector(struct String_vector* strings);

/// convert a zookeeper String_vector to a c++ vector<string>
std::vector<std::string> String_vector_to_vector(
    const struct String_vector* strings);

/// print a few zookeeper error status
void print_stat(int stat,
                const std::string& prefix,
                const std::string& path);

/// adds a trailing / to the path name if there is not one already
std::string normalize_path(std::string prefix);

/// Creates a zookeeper directory
int create_dir(zhandle_t* handle, const std::string& path,
               const std::string& stat_message = "");

/// Deletes a zookeeper directory
int delete_dir(zhandle_t* handle, const std::string& path,
               const std::string& stat_message = "");

/// Creates a zookeeper ephemeral node
int create_ephemeral_node(zhandle_t* handle,
                 const std::string& path,
                 const std::string& value,
                 const std::string& stat_message = "");

/// Deletes a zookeeper ephemeral node
int delete_node(zhandle_t* handle,
                const std::string& path,
                const std::string& stat_message = "");

/// Deletes a zookeeper sequence node
int delete_sequence_node(zhandle_t* handle,
                         const std::string& path,
                         const int version,
                         const std::string& stat_message = "");

/// Gets the effective node name for a sequence node of a particular sequence number
std::string get_sequence_node_path(const std::string& path,
                                   const int version);


/// Creates a zookeeper ephemeral sequence nodea
/// Returns a pair of (status, version)
std::pair<int, int> create_ephemeral_sequence_node(zhandle_t* handle,
                                                   const std::string& path,
                                                   const std::string& value,
                                                   const std::string& stat_message = "");

/// Gets the value in a node. output is a pair of (success, value)
std::pair<bool, std::string> get_node_value(zhandle_t* handle,
                                            const std::string& node,
                                            const std::string& stat_message = "");


} // graphlab
} // zookeeper

#endif


================================================
FILE: src/graphlab.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
 

#ifndef GRAPHLAB_MASTER_INCLUDES
#define GRAPHLAB_MASTER_INCLUDES


// #include <graphlab/aggregation/aggregation_includes.hpp>
#include <graphlab/engine/engine_includes.hpp>
#include <graphlab/graph/graph_includes.hpp>
#include <graphlab/options/options_includes.hpp>
#include <graphlab/logger/logger_includes.hpp>
#include <graphlab/parallel/parallel_includes.hpp>
#include <graphlab/scheduler/scheduler_includes.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/vertex_program/vertex_program_includes.hpp>
#include <graphlab/util/util_includes.hpp>
#include <graphlab/rpc/rpc_includes.hpp>
#include <graphlab/ui/metrics_server.hpp>


#endif


================================================
FILE: tests/CMakeLists.txt
================================================
project(tests)
subdirs(data)

ADD_CXXTEST(random_test.cxx)

# move into toolkit
#ADD_CXXTEST(factor_test.cxx)
ADD_CXXTEST(small_map_test.cxx)
ADD_CXXTEST(small_set_test.cxx)

ADD_CXXTEST(dense_bitset_test.cxx)
ADD_CXXTEST(serializetests.cxx)
ADD_CXXTEST(thread_tools.cxx)

ADD_CXXTEST(test_lock_free_pool.cxx)
ADD_CXXTEST(lock_free_pushback.cxx)
ADD_CXXTEST(union_find_test.cxx)

ADD_CXXTEST(empty_test.cxx)
# ADD_CXXTEST(scheduler_test.cxx)

ADD_CXXTEST(csr_storage_test.cxx)
ADD_CXXTEST(local_graph_test.cxx)
add_graphlab_executable(distributed_graph_test distributed_graph_test.cpp)
add_graphlab_executable(distributed_ingress_test distributed_ingress_test.cpp)

add_graphlab_executable(cuckootest cuckootest.cpp)
add_graphlab_executable(dc_consensus_test dc_consensus_test.cpp)
add_graphlab_executable(distributed_chandy_misra_test distributed_chandy_misra_test.cpp)
add_graphlab_executable(dc_fiber_consensus_test dc_fiber_consensus_test.cpp)
add_graphlab_executable(dc_test_sequentialization dc_test_sequentialization.cpp)
add_graphlab_executable(hdfs_test hdfs_test.cpp)
add_graphlab_executable(test_parsers test_parsers.cpp)

add_graphlab_executable(synchronous_engine_test synchronous_engine_test.cpp)
add_graphlab_executable(async_consistent_test async_consistent_test.cpp)

add_graphlab_executable(sfinae_function_test sfinae_function_test.cpp)

add_test(synchronous_engine_test synchronous_engine_test)
add_test(async_consistent_test async_consistent_test)

# copyfile(runtests.sh)

add_graphlab_executable(mini_web_server mini_web_server.cpp)

add_graphlab_executable(test_vertex_set test_vertex_set.cpp)

add_test(test_vertex_set test_vertex_set)
add_graphlab_executable(arbitrary_signal_test arbitrary_signal_test.cpp)


add_graphlab_executable(sort_test sort_test.cpp)

add_graphlab_executable(hopscotch_test hopscotch_test.cpp)

add_graphlab_executable(fiber_test fiber_test.cpp)
add_graphlab_executable(fibo_fiber_test fibo_fiber_test.cpp)


================================================
FILE: tests/arbitrary_signal_test.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <algorithm>
#include <iostream>


// #include <cxxtest/TestSuite.h>

#include <graphlab.hpp>

typedef graphlab::distributed_graph<int,int> graph_type;


class test_uf:
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type
  gather_edges(icontext_type& context, 
               const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    if (vertex.id() < 99) context.signal_vid(vertex.id() + 1);
  }
  edge_dir_type scatter_edges(icontext_type& context, 
                              const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors


typedef graphlab::async_consistent_engine<test_uf> agg_engine_type;
//typedef graphlab::synchronous_engine<test_uf> agg_engine_type;

int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_WARNING);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::dc_init_param rpc_parameters;
  graphlab::init_param_from_mpi(rpc_parameters);
  graphlab::distributed_control dc(rpc_parameters);

  graphlab::command_line_options clopts("Test code.");
  clopts.set_scheduler_type("queued_fifo");
  std::cout << "Creating a powerlaw graph" << std::endl;
  graph_type graph(dc, clopts);
  graph.load_synthetic_powerlaw(100);


  typedef agg_engine_type engine_type;
  engine_type engine(dc, graph, clopts);
  engine.signal(0);
  engine.start();

  ASSERT_EQ(engine.num_updates(), 100);
  graphlab::mpi_tools::finalize();
} // end of main


================================================
FILE: tests/async_consistent_test.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <algorithm>
#include <iostream>


// #include <cxxtest/TestSuite.h>

#include <graphlab.hpp>

typedef graphlab::distributed_graph<int,int> graph_type;


class count_in_neighbors :
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }
  gather_type
  gather(icontext_type& context, const vertex_type& vertex,
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    ASSERT_EQ( total, int(vertex.num_in_edges()) );
  }
  edge_dir_type
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors


void test_in_neighbors(graphlab::distributed_control& dc,
                       graphlab::command_line_options& clopts,
                       graph_type& graph) {
  std::cout << "Constructing an engine for in neighbors" << std::endl;
  typedef graphlab::async_consistent_engine<count_in_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
}


class count_out_neighbors :
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }
  gather_type
  gather(icontext_type& context, const vertex_type& vertex,
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    ASSERT_EQ( total, int(vertex.num_out_edges()) );
  }
  edge_dir_type
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors

void test_out_neighbors(graphlab::distributed_control& dc,
                        graphlab::command_line_options& clopts,
                        graph_type& graph) {
  std::cout << "Constructing an engine for out neighbors" << std::endl;
  typedef graphlab::async_consistent_engine<count_out_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
}


class count_all_neighbors :
  public graphlab::ivertex_program<graph_type, int, int>,
  public graphlab::IS_POD_TYPE {
public:
  void init(icontext_type& context, const vertex_type& vertex,
                    const message_type& msg) {
    ASSERT_EQ(msg, 100);
  }
  
  edge_dir_type
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  gather_type
  gather(icontext_type& context, const vertex_type& vertex,
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    ASSERT_EQ( total, int(vertex.num_in_edges() + vertex.num_out_edges() ) );
  }
  edge_dir_type
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors

void test_all_neighbors(graphlab::distributed_control& dc,
                        graphlab::command_line_options& clopts,
                        graph_type& graph) {
  std::cout << "Constructing an engine for all neighbors" << std::endl;
  typedef graphlab::async_consistent_engine<count_all_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all(100);
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
}


// Make a slow version so that the asynchronous aggregators get a change
// to run. Basically, sleep a bit on apply.
class count_all_neighbors_slow :
  public graphlab::ivertex_program<graph_type, int, int>,
  public graphlab::IS_POD_TYPE {
public:
  void init(icontext_type& context, const vertex_type& vertex,
                    const message_type& msg) {
    ASSERT_EQ(msg, 100);
  }

  edge_dir_type
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  gather_type
  gather(icontext_type& context, const vertex_type& vertex,
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {
    graphlab::timer::sleep_ms(100);
    ASSERT_EQ( total, int(vertex.num_in_edges() + vertex.num_out_edges() ) );
  }
  edge_dir_type
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors


typedef graphlab::async_consistent_engine<count_all_neighbors_slow> agg_engine_type;

size_t agg_map(agg_engine_type::icontext_type& context,
              const agg_engine_type::vertex_type& vtx) {
  return 1;
}

void agg_finalize(agg_engine_type::icontext_type& context,
                  size_t result) {
  std::cout << "Aggregator: #vertices = " << result << std::endl;
}


size_t agg_edge_map(agg_engine_type::icontext_type& context,
              const agg_engine_type::edge_type& vtx) {
  return 1;
}

void agg_edge_finalize(agg_engine_type::icontext_type& context,
                  size_t result) {
  std::cout << "Aggregator: #edges= " << result << std::endl;
}


size_t identity_vertex_map(agg_engine_type::vertex_type vtx) {
  return vtx.data();
}
size_t identity_edge_map(agg_engine_type::edge_type e) {
  return e.data();
}


size_t identity_vertex_map_context(agg_engine_type::icontext_type& context,
                                   agg_engine_type::vertex_type vtx) {
  return vtx.data();
}
size_t identity_edge_map_context(agg_engine_type::icontext_type& context,
                                 agg_engine_type::edge_type e) {
  return e.data();
}


void  set_vertex_to_one(agg_engine_type::vertex_type vtx) {
  vtx.data() = 1;
}
void  set_edge_to_one(agg_engine_type::edge_type e) {
  e.data() = 1;
}


void  vertex_plus_one(agg_engine_type::vertex_type vtx) {
  ++vtx.data();
}


void vertex_minus_one_context(agg_engine_type::icontext_type& context,
                                agg_engine_type::vertex_type vtx) {
  --vtx.data();
}

void edge_plus_one(agg_engine_type::edge_type e) {
  ++e.data();
}


void edge_minus_one_context(agg_engine_type::icontext_type& context,
                              agg_engine_type::edge_type e) {
  --e.data();
}

void test_aggregator(graphlab::distributed_control& dc,
                     graphlab::command_line_options& clopts,
                     graph_type& graph) {
  std::cout << "Constructing an engine for all neighbors" << std::endl;
  agg_engine_type engine(dc, graph, clopts);
  engine.add_vertex_aggregator<size_t>("num_vertices_counter", agg_map, agg_finalize);
  engine.add_edge_aggregator<size_t>("num_edges_counter", agg_edge_map, agg_edge_finalize);
  // reset all
  graph.transform_vertices(set_vertex_to_one);
  graph.transform_edges(set_edge_to_one);
  
  ASSERT_EQ(graph.map_reduce_vertices<size_t>(identity_vertex_map), graph.num_vertices());
  graph.transform_vertices(vertex_plus_one);
  ASSERT_EQ(graph.map_reduce_vertices<size_t>(identity_vertex_map), 2 * graph.num_vertices());
  engine.transform_vertices(vertex_minus_one_context);
  ASSERT_EQ(graph.map_reduce_vertices<size_t>(identity_vertex_map), graph.num_vertices());
  ASSERT_EQ(engine.map_reduce_vertices<size_t>(identity_vertex_map_context), graph.num_vertices());
  
  ASSERT_EQ(graph.map_reduce_edges<size_t>(identity_edge_map), graph.num_edges());
  graph.transform_edges(edge_plus_one);
  ASSERT_EQ(graph.map_reduce_edges<size_t>(identity_edge_map), 2 * graph.num_edges());
  engine.transform_edges(edge_minus_one_context);
  ASSERT_EQ(graph.map_reduce_edges<size_t>(identity_edge_map), graph.num_edges());
  ASSERT_EQ(engine.map_reduce_edges<size_t>(identity_edge_map_context), graph.num_edges());
  
  ASSERT_TRUE(engine.aggregate_now("num_vertices_counter"));
  ASSERT_TRUE(engine.aggregate_now("num_edges_counter"));
  ASSERT_TRUE(engine.aggregate_periodic("num_vertices_counter", 0.2));
  ASSERT_TRUE(engine.aggregate_periodic("num_edges_counter", 0.2));
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all(100);
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
}


int main(int argc, char** argv) {

  global_logger().set_log_level(LOG_INFO);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::dc_init_param rpc_parameters;
  graphlab::init_param_from_mpi(rpc_parameters);
  graphlab::distributed_control dc(rpc_parameters);

  graphlab::command_line_options clopts("Test code.");
  clopts.set_scheduler_type("queued_fifo");
  std::cout << "Creating a powerlaw graph" << std::endl;
  graph_type graph(dc, clopts);
  graph.load_synthetic_powerlaw(100);

  test_in_neighbors(dc, clopts, graph);
  test_out_neighbors(dc, clopts, graph);
  test_all_neighbors(dc, clopts, graph);
  test_aggregator(dc, clopts, graph);
  graphlab::mpi_tools::finalize();
} // end of main


================================================
FILE: tests/chandy_misra.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <queue>
#include <graphlab/graph/graph.hpp>
#include <graphlab/util/chandy_misra.hpp>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/macros_def.hpp>
using namespace graphlab;

typedef graph<int, int> graph_type;


class ChandyMisraTest: public CxxTest::TestSuite {
 public:
  void test_cm() {
    graph_type g;
    for (size_t i = 0;i < 25; ++i) g.add_vertex(0);
    for (size_t i = 0;i < 25; ++i) {
      for (size_t j = 0;j < 25; ++j) {
        if ((i != j) && (rand() % 1000 <= 100)) {
          ASSERT_NE(i, j);
          g.add_edge(i, j, 0);
        }
      }
    }
    g.finalize();
    chandy_misra<graph_type> cm(g);
    for (size_t i = 0;i < 100; ++i) {
      TS_ASSERT_EQUALS(cm.make_philosopher_hungry(i % 25), i % 25);
      std::vector<vertex_id_type> r = cm.philosopher_stops_eating(i % 25);
      TS_ASSERT_EQUALS(r.size(), size_t(0));
    }
    // test more aggressive
    for (size_t k = 0;k < 10; ++k) {
      dense_bitset locked, ready, complete;
      locked.resize(25); ready.resize(25);
      locked.clear(); ready.clear();
      complete.resize(25); complete.clear();

      for (size_t i = 0;i < 25; ++i) {
        locked.set_bit(i);
        vertex_id_type ret = cm.make_philosopher_hungry(i);
        if (ret != (vertex_id_type)(-1)) {
          complete.set_bit(ret);
          ready.set_bit(ret);
        }
      }
      cm.complete_consistency_check();
      
      while(1) {
        if (ready.popcount() == 0 && complete.popcount() == g.num_vertices()) break;
        foreach(size_t i, ready) {
          ready.clear_bit(i);
          std::vector<vertex_id_type> r = cm.philosopher_stops_eating(i);
          cm.complete_consistency_check();
          
          foreach(vertex_id_type j, r) {
            TS_ASSERT(locked.get(j));
            complete.set_bit(j);
            ready.set_bit(j);
          }
        }
      }
      cm.no_locks_consistency_check();
      cm.complete_consistency_check();
    }
    cm.no_locks_consistency_check();
    cm.complete_consistency_check();
    {
      // test very aggressive
      std::vector<vertex_id_type> ctr(25, 10);
      size_t n = 25 * 10;
      std::queue<size_t> ready;
      for (size_t i = 0;i < 25; ++i) {
        vertex_id_type ret = cm.make_philosopher_hungry(i);
        if (ret != vertex_id_type(-1)) ready.push(ret);
      }
      while(!ready.empty()) {
        size_t i = ready.front(); ready.pop();
        TS_ASSERT(ctr[i] > 0);
        ctr[i]--;
        n--;
        std::vector<vertex_id_type> r = cm.philosopher_stops_eating(i);
        foreach(vertex_id_type v, r) ready.push(v);
        if (ctr[i] > 0) {
          vertex_id_type ret = cm.make_philosopher_hungry(i);
          if (ret != vertex_id_type(-1)) ready.push(ret);
        }
        cm.complete_consistency_check();
      }
      TS_ASSERT_EQUALS(n, size_t(0));
    }
  }

  void test_parallel() {
    
  }
};


================================================
FILE: tests/csr_storage_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#include <iostream>
#include <cxxtest/TestSuite.h>

#include <graphlab/util/generics/csr_storage.hpp>
#include <graphlab/util/generics/dynamic_csr_storage.hpp>
#include <graphlab/util/generics/shuffle.hpp>
#include <graphlab/logger/assertions.hpp>

class csr_storage_test : public CxxTest::TestSuite {  
 public:
  typedef int valuetype;
  typedef size_t keytype;
  typedef size_t sizetype;

  typedef graphlab::csr_storage<valuetype, sizetype> csr_storage;
  typedef graphlab::dynamic_csr_storage<valuetype, sizetype, 2> dcsr2_t;
  typedef graphlab::dynamic_csr_storage<valuetype, sizetype, 4> dcsr4_t;
  typedef graphlab::dynamic_csr_storage<valuetype, sizetype, 8> dcsr8_t;
  typedef graphlab::dynamic_csr_storage<valuetype, sizetype, 16> dcsr16_t;
  typedef graphlab::dynamic_csr_storage<valuetype, sizetype, 64> dcsr64_t;
  
 public:
  csr_storage_test() {
    keytype keyin_arr[] = {1, 3, 6, 9, 5, 2};
    valuetype valin_arr[] = {3, 2, 1, 4, 4, 4};

    _keyin.assign(keyin_arr, keyin_arr + sizeof(keyin_arr) / sizeof(keytype));
    _valin.assign(valin_arr, valin_arr + sizeof(valin_arr) / sizeof(valuetype));

    keytype keyout_arr[] = {1, 2, 3, 5, 6, 9};
    valuetype valout_arr[] = {3, 4, 2, 4, 1, 4};

    _keyout.assign(keyout_arr, keyout_arr + sizeof(keyout_arr) / sizeof(keytype));
    _valout.assign(valout_arr, valout_arr + sizeof(valout_arr) / sizeof(valuetype));
  }

  void test_csr_storage() {
    std::cout << "Test csr_storage constructor" << std::endl;
    csr_storage csr(get_keyin(), get_valin());
    check(csr, get_keyout(), get_valout());
    csr.print(std::cout);
    printf("+ Pass test: csr_storage constructor :)\n\n");
  }

  void test_csr_storage2() {
    std::cout << "Test csr_storage wrap " << std::endl;
    std::vector<keytype> keys(get_keyin());
    std::vector<valuetype> values(get_valin());

    std::vector<sizetype> permute_index;
    std::vector<sizetype> prefix;

    graphlab::counting_sort(keys, permute_index, &prefix);
    graphlab::outofplace_shuffle(values, permute_index);

    csr_storage csr;
    csr.wrap(prefix, values);
    check(csr, get_keyout(), get_valout());
    printf("+ Pass test: csr_storage wrap :)\n\n");
  }

  template<typename csr_type>
  void dynamic_csr_storage_constructor_test() {
    std::cout << "Test dynamic csr_storage constructor" << std::endl;
    csr_type csr(get_keyin(), get_valin());
    check(csr, get_keyout(), get_valout());
    printf("+ Pass test: dynamic_csr_storage constructor :)\n\n");

    std::cout << "Test dynamic csr_storage wrap" << std::endl;

    csr.clear();
    std::vector<keytype> keys(get_keyin());
    std::vector<valuetype> values(get_valin());
    std::vector<sizetype> permute_index;
    std::vector<sizetype> prefix;
    
    graphlab::counting_sort(keys, permute_index, &prefix);
    graphlab::outofplace_shuffle(values, permute_index);

    csr.wrap(prefix, values);
    check(csr, get_keyout(), get_valout());
    printf("+ Pass test: dynamic_csr_storage wrap:)\n\n");
  }

  template<typename csr_type>
  void dynamic_csr_storage_insertion_test() {
    std::cout << "Test dynamic csr_storage insertion" << std::endl;
    std::vector<keytype> keys(get_keyin());
    std::vector<valuetype> values(get_valin());

    csr_type csr;
    for (size_t i = 0; i < keys.size(); ++i) {
      csr.insert(keys[i], values[i]);
    }
    csr.get_values().print(std::cerr);
    check(csr, get_keyout(), get_valout());
    csr.repack();
    check(csr, get_keyout(), get_valout());
    printf("+ Pass test: dynamic_csr_storage insertion:)\n\n");
  }

  void test_dynamic_csr_storage_constructor() {
      dynamic_csr_storage_constructor_test<dcsr2_t>();
      dynamic_csr_storage_constructor_test<dcsr4_t>();
      dynamic_csr_storage_constructor_test<dcsr8_t>();
      dynamic_csr_storage_constructor_test<dcsr16_t>();
  }
 
  void test_dynamic_csr_storage_insertion() {
      dynamic_csr_storage_insertion_test<dcsr2_t>();
      dynamic_csr_storage_insertion_test<dcsr4_t>();
      dynamic_csr_storage_insertion_test<dcsr8_t>();
      dynamic_csr_storage_insertion_test<dcsr16_t>();
  }
 
  template<typename csr_type>
  void dynamic_csr_storage_range_insertion_test(size_t nkey, size_t nval) {
    std::cout << "Test dynamic csr_storage range insertion" << std::endl;
    csr_type csr;
    for (size_t i = 0; i < nkey; ++i) {
      std::vector<valuetype> vals(nval, i);
      csr.insert(i, vals.begin(), vals.end());
    }
    // csr.print(std::cout);
    check_dcsr(csr, nkey, nval);

    csr.clear();
    ASSERT_EQ(csr.num_keys(), 0);
    ASSERT_EQ(csr.num_values(), 0);
    ASSERT_EQ(csr.get_values().num_blocks(), 0);

    for (int i = nkey-1; i >= 0; --i) {
      std::vector<valuetype> vals(nval, i);
      csr.insert((keytype)i, vals.begin(), vals.end());
    }
    csr.get_values().print(std::cout);
    check_dcsr(csr, nkey, nval);
    std::cout << "test repack..." << std::endl;
    csr.repack();
    check_dcsr(csr, nkey, nval);
    printf("+ Pass test: dynamic_csr_storage range insertion:)\n\n");
  }

  void test_dynamic_csr_storage_range_insertion() {
      dynamic_csr_storage_range_insertion_test<dcsr2_t>(4, 4);
      dynamic_csr_storage_range_insertion_test<dcsr4_t>(6, 9);
      dynamic_csr_storage_range_insertion_test<dcsr8_t>(8, 3);
      dynamic_csr_storage_range_insertion_test<dcsr16_t>(20, 64);
  }

  void test_dynamic_csr_storage_stress_insertion() {
      stress_insertion_test<dcsr2_t>(4, 4);
      stress_insertion_test<dcsr8_t>(6, 9);
      stress_insertion_test<dcsr4_t>(8, 3);
      stress_insertion_test<dcsr64_t>(982, 294);
  }


  template<typename csr_type>
  void stress_insertion_test(size_t nkey, size_t nval) {
    std::cout << "Test dynamic csr_storage stess insertion" << std::endl;
    // stress test single insertion
    csr_type csr;

    for (size_t j = 0; j < nval; ++j) {
      for (size_t i = 0; i < nkey; i+=2) {
        csr.insert(i, i);
      }
    }
    for (size_t j = 0; j < nval; ++j) {
      for (int i = nkey-1; i >= 0; i-=2) {
        csr.insert((keytype)i, i);
      }
    }
    check_dcsr(csr, nkey, nval);

    csr.clear();
    // stress test range insertion
    for (size_t i = 0; i < nkey; i+=2) {
      std::vector<valuetype> values(nval, i);
      csr.insert(i, values.begin(), values.end());
    }
    for (int i = nkey-1; i>=0; i-=2) {
      std::vector<valuetype> values(nval, i);
      csr.insert((keytype)i, values.begin(), values.end());
    }
    check_dcsr(csr, nkey, nval);
    printf("+ Pass test: dynamic_csr_storage stress insertion:)\n\n");
  }

 private:
  template<typename csr_type>
      void check(csr_type& csr,
                 std::vector<keytype> keyout,
                 std::vector<valuetype> valout) {
        typedef typename csr_type::iterator iterator;
        size_t id = 0;
        for (size_t i = 0; i < csr.num_keys(); ++i) {
          iterator iter = csr.begin(i);
          while (iter != csr.end(i)) {
            ASSERT_EQ(i, keyout[id]);
            ASSERT_EQ(*iter, valout[id]); 
            ++iter;
            ++id;
          }
        }
      }
  template<typename csr_type>
      void check_dcsr(csr_type& csr,
                      size_t nkey,
                      size_t nval) {
    ASSERT_EQ(csr.num_keys(), nkey);
    ASSERT_EQ(csr.num_values(), nkey*nval);
    for (size_t i = 0; i < csr.num_keys(); ++i) {
      typename csr_type::iterator iter = csr.begin(i);
      size_t size = 0;
      while(iter != csr.end(i)) {
        ASSERT_EQ(*iter, (valuetype)i);
        ++iter;
        ++size;
      }
      ASSERT_EQ(size, nval);
    }
    csr.meminfo(std::cout);
  }


  std::vector<keytype> get_keyin() { return std::vector<keytype>(_keyin); }

  std::vector<valuetype> get_valin() { return std::vector<valuetype>(_valin); }
  
  std::vector<keytype> get_keyout() { return std::vector<keytype>(_keyout); }

  std::vector<valuetype> get_valout() { return std::vector<valuetype>(_valout); }

  std::vector<keytype> _keyin;
  std::vector<keytype> _keyout;
  std::vector<valuetype> _valin;
  std::vector<valuetype> _valout;
}; // end of test


================================================
FILE: tests/cuckootest.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <sstream>
#include <graphlab/util/cuckoo_map.hpp>
#include <graphlab/util/cuckoo_map_pow2.hpp>
#include <graphlab/util/cuckoo_set_pow2.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/util/memory_info.hpp>
#include <boost/unordered_map.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/macros_def.hpp>

void sanity_checks() {
  boost::unordered_map<size_t, size_t> um;
  graphlab::cuckoo_map_pow2<size_t, size_t> cm(-1);
  ASSERT_TRUE(cm.begin() == cm.end());
  for (size_t i = 0;i < 10000; ++i) {
    cm[17 * i] = i;
    um[17 * i] = i;
  }

  for (size_t i = 0;i < 10000; ++i) {
    assert(cm[17 * i] == i);
    assert(um[17 * i] == i);
  }
  assert(cm.size() == 10000);
  assert(um.size() == 10000);

  for (size_t i = 0;i < 10000; i+=2) {
    cm.erase(17*i);
    um.erase(17*i);
  }
  for (size_t i = 0;i < 10000; i+=2) {
    assert(cm.count(17*i) == i % 2);
    assert(um.count(17*i) == i % 2);
    if (cm.count(17*i)) {
      assert(cm.find(17*i)->second == i);
    }
  }

  assert(cm.size() == 5000);
  assert(um.size() == 5000);

  typedef graphlab::cuckoo_map_pow2<size_t, size_t, (size_t)(-1)>::value_type vpair;
  {
    size_t cnt = 0;
    foreach(vpair &v, cm) {
      ASSERT_EQ(v.second, um[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, 5000);
  }
  {
    size_t cnt = 0;
    foreach(const vpair &v, cm) {
      ASSERT_EQ(v.second, um[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, 5000);
  }
}


void sanity_checks2() {
  boost::unordered_map<size_t, size_t> um;
  graphlab::cuckoo_map<size_t, size_t> cm(-1);
  ASSERT_TRUE(cm.begin() == cm.end());

  for (size_t i = 0;i < 10000; ++i) {
    cm[17 * i] = i;
    um[17 * i] = i;
  }

  for (size_t i = 0;i < 10000; ++i) {
    assert(cm[17 * i] == i);
    assert(um[17 * i] == i);
  }
  assert(cm.size() == 10000);
  assert(um.size() == 10000);

  for (size_t i = 0;i < 10000; i+=2) {
    cm.erase(17*i);
    um.erase(17*i);
  }
  for (size_t i = 0;i < 10000; i+=2) {
    assert(cm.count(17*i) == i % 2);
    assert(um.count(17*i) == i % 2);
    if (cm.count(17*i)) {
      assert(cm.find(17*i)->second == i);
    }
  }

  assert(cm.size() == 5000);
  assert(um.size() == 5000);

  typedef graphlab::cuckoo_map<size_t, size_t, (size_t)(-1)>::value_type vpair;
  {
    size_t cnt = 0;
    foreach(vpair &v, cm) {
      ASSERT_EQ(v.second, um[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, 5000);
  }
  {
    size_t cnt = 0;
    foreach(const vpair &v, cm) {
      ASSERT_EQ(v.second, um[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, 5000);
  }
}

std::string randstring(size_t len) {
  std::string ret; ret.reserve(len);
  for (size_t i = 0;i < len; ++i) {
    ret = ret + graphlab::random::fast_uniform('A','Z');
  }
  return ret;
}

void more_interesting_data_types_check() {
  boost::unordered_map<std::string, std::string> um;
  graphlab::cuckoo_map_pow2<std::string, std::string> cm("");
  for (size_t i = 0;i < 10000; ++i) {
    std::string s = randstring(16);
    cm[s] = s;
    um[s] = s;
  }

  assert(cm.size() == 10000);
  assert(um.size() == 10000);

  
  typedef boost::unordered_map<std::string, std::string>::value_type vpair;
  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm[v.first]);
  }


  foreach(vpair& v, cm) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm) {
    ASSERT_EQ(v.second, um[v.first]);
  }

  // test assignment
  graphlab::cuckoo_map_pow2<std::string, std::string> cm2("");
  cm2 = cm;

  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm2[v.first]);
  }


  foreach(vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }

  std::stringstream strm;
  graphlab::oarchive oarc(strm);
  oarc << cm;
  strm.flush();

  
  cm2.clear();
  ASSERT_EQ(cm2.size(), 0);
  graphlab::iarchive iarc(strm);
  iarc >> cm2;
  ASSERT_EQ(cm2.size(), 10000);

  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm2[v.first]);
  }


  foreach(vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }
}


void more_interesting_data_types_check2() {
  boost::unordered_map<std::string, std::string> um;
  graphlab::cuckoo_map<std::string, std::string> cm("");
  for (size_t i = 0;i < 10000; ++i) {
    std::string s = randstring(16);
    cm[s] = s;
    um[s] = s;
  }

  assert(cm.size() == 10000);
  assert(um.size() == 10000);


  typedef boost::unordered_map<std::string, std::string>::value_type vpair;
  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm[v.first]);
  }


  foreach(vpair& v, cm) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  // test assignment
  graphlab::cuckoo_map<std::string, std::string> cm2("");
  cm2 = cm;

  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm2[v.first]);
  }


  foreach(vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  std::stringstream strm;
  graphlab::oarchive oarc(strm);
  oarc << cm;
  strm.flush();


  cm2.clear();
  ASSERT_EQ(cm2.size(), 0);
  graphlab::iarchive iarc(strm);
  iarc >> cm2;
  ASSERT_EQ(cm2.size(), 10000);

  foreach(vpair& v, um) {
    ASSERT_EQ(v.second, cm2[v.first]);
  }


  foreach(vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }


  foreach(const vpair& v, cm2) {
    ASSERT_EQ(v.second, um[v.first]);
  }
}


void benchmark() {
  graphlab::timer ti;

  size_t NUM_ELS = 10000000;
  
  std::vector<uint32_t> v;
  uint32_t u = 0;
  for (size_t i = 0;i < NUM_ELS; ++i) {
    v.push_back(u);
    u += 1 + rand() % 8;
  }
  std::random_shuffle(v.begin(), v.end());
  graphlab::memory_info::print_usage();

  {
    boost::unordered_map<uint32_t, uint32_t> um;
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      um[v[i]] = i;
    }
    std::cout <<  NUM_ELS / 1000000 << "M unordered map inserts in " << ti.current_time() << " (Load factor = " << um.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();
    
    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = um[v[i]];
      assert(t == i);
    }
    std::cout << "10M unordered map successful probes in " << ti.current_time() << std::endl;
    um.clear();
  }

  {
    graphlab::cuckoo_map<uint32_t, uint32_t, 3, uint32_t> cm(-1, 128);

    //cm.reserve(102400);
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = i;
      if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout <<  NUM_ELS / 1000000 << "M cuckoo map inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = cm[v[i]];
      assert(t == i);
    }
    std::cout << "10M cuckoo map successful probes in " << ti.current_time() << std::endl;

  }
  
  {
    graphlab::cuckoo_map_pow2<uint32_t, uint32_t, 3, uint32_t> cm(-1, 128);
    
    //cm.reserve(102400);
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = i;
      if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout << NUM_ELS / 1000000 << "M cuckoo map pow2 inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = cm[v[i]];
      assert(t == i);
    }
    std::cout << "10M cuckoo map pow2 successful probes in " << ti.current_time() << std::endl;

  }
}


void benchmark_strings() {
  graphlab::timer ti;

  size_t NUM_ELS = 1000000;

  std::vector<std::string> v;
  for (size_t i = 0;i < NUM_ELS; ++i) {
    v.push_back(randstring(16));
  }
  graphlab::memory_info::print_usage();

  {
    boost::unordered_map<std::string, std::string> um;
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      um[v[i]] = v[i];
    }
    std::cout <<  NUM_ELS / 1000000 << "M unordered map inserts in " << ti.current_time() << " (Load factor = " << um.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 1000000; ++i) {
      std::string t = um[v[i]];
      assert(t == v[i]);
    }
    std::cout << "1M unordered map successful probes in " << ti.current_time() << std::endl;
    um.clear();
  }

  {
    graphlab::cuckoo_map<std::string, std::string, 3, uint32_t> cm("", 128);

    //cm.reserve(102400);
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = v[i];
      if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout <<  NUM_ELS / 1000000 << "M cuckoo map inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 1000000; ++i) {
      std::string t = cm[v[i]];
      assert(t == v[i]);
    }
    std::cout << "1M cuckoo map successful probes in " << ti.current_time() << std::endl;

  }

  {
    graphlab::cuckoo_map_pow2<std::string, std::string, 3, uint32_t> cm("", 128);

    //cm.reserve(102400);
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = v[i];
      if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout << NUM_ELS / 1000000 << "M cuckoo map pow2 inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 1000000; ++i) {
      std::string t = cm[v[i]];
      assert(t == v[i]);
    }
    std::cout << "1M cuckoo map pow2 successful probes in " << ti.current_time() << std::endl;

  }
}


void save_load_test() {
  typedef graphlab::cuckoo_map_pow2<uint32_t, uint32_t, 3, uint32_t> cuckoo_map_type;
  cuckoo_map_type map(-1);
  for(uint32_t i = 0; i < 10000; ++i) map[i] = i;
  std::ofstream fout("tmp.txt");
  graphlab::oarchive oarc(fout);
  std::string t = "The end.";
  oarc << map << t;
  fout.close();
  std::ifstream fin("tmp.txt");
  graphlab::iarchive iarc(fin);
  cuckoo_map_type map2(-1);
  std::string txt;
  iarc >> map2;
  iarc >> txt;
  ASSERT_EQ(txt, std::string("The end."));
  for(uint32_t i = 0; i < 10000; ++i) 
    ASSERT_EQ(map[i], i);
} // end of save load test


void cuckoo_set_sanity_checks() {
  boost::unordered_set<uint32_t> um;
  graphlab::cuckoo_set_pow2<uint32_t> cm(-1, 2, 2);
  ASSERT_TRUE(cm.begin() == cm.end());
  for (size_t i = 0;i < 10000; ++i) {
    cm.insert(17 * i);
    um.insert(17 * i);
  }

  for (size_t i = 0;i < 10000; ++i) {
    assert(cm.count(17 * i) == 1);
    assert(um.count(17 * i) == 1);
  }
  assert(cm.size() == 10000);
  assert(um.size() == 10000);

  for (size_t i = 0;i < 10000; i+=2) {
    cm.erase(17*i);
    um.erase(17*i);
  }
  for (size_t i = 0;i < 10000; i+=2) {
    assert(cm.count(17*i) == i % 2);
    assert(um.count(17*i) == i % 2);
  }

  assert(cm.size() == 5000);
  assert(um.size() == 5000);

  std::ofstream fout("tmp.txt");
  graphlab::oarchive oarc(fout);
  oarc << cm;
  fout.close();
  std::ifstream fin("tmp.txt");
  graphlab::iarchive iarc(fin);
  graphlab::cuckoo_set_pow2<uint32_t> set2(-1);
  iarc >> set2;
  assert(set2.size() == 5000);
}


int main(int argc, char** argv) {
  std::cout << "Basic Sanity Checks... ";
  std::cout.flush();
  sanity_checks();
  sanity_checks2();
  more_interesting_data_types_check();
  more_interesting_data_types_check2();
  save_load_test();


  cuckoo_set_sanity_checks();

  std::cout << "Done" << std::endl;


  // std::cout << "\n\n\nRunning Benchmarks. uint32-->uint32" << std::endl;
  // benchmark();


  // std::cout << "\n\n\nRunning Benchmarks. string-->string" << std::endl;
  // benchmark_strings();

}


================================================
FILE: tests/data/CMakeLists.txt
================================================
project(tests)

copy_files(*)


================================================
FILE: tests/data/test_adj/test.adj
================================================
0	1	5
1	2	0	5
2	2	0	5
3	2	0	5


================================================
FILE: tests/data/test_snap/test.snap
================================================
# Snap Comments 
# Blah Blah Blah
0	5
1	0
1	5
2	0
2	5
3	0
3	5


================================================
FILE: tests/data/test_tsv/test.tsv
================================================
0	5
1	0
1	5
2	0
2	5
3	0
3	5


================================================
FILE: tests/dc_consensus_test.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <map>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/util/blocking_queue.hpp>
#include <graphlab/rpc/async_consensus.hpp>
using namespace graphlab;


class simple_engine_test {
 public:
  dc_dist_object<simple_engine_test> rmi;
  blocking_queue<size_t> queue;
  async_consensus cons;
  atomic<size_t> numactive;;

  simple_engine_test(distributed_control &dc):rmi(dc, this), cons(dc, 4) {
    numactive.value = 4; 
    dc.barrier();
  }

  void add_task_local(size_t i) {
    queue.enqueue(i);
    if (numactive.value < 4) cons.cancel();
  }  
  
  void task(size_t i) {
    if (i < 5) std::cout << "Task " << i << std::endl;
    if (i > 0) {
      if (rmi.numprocs() == 1) {
        add_task_local(i - 1);
      }
      else {
        rmi.remote_call((procid_t)((rmi.procid() + 1) % rmi.numprocs()),
                    &simple_engine_test::add_task_local,
                    i - 1);
      }
    }
  }
  
  bool try_terminate(size_t cpuid, std::pair<size_t, bool> &job) {
    job.second = false;
    
    numactive.dec();
    cons.begin_done_critical_section(cpuid);
    job = queue.try_dequeue();
    if (job.second == false) {
      bool ret = cons.end_done_critical_section(cpuid);
      numactive.inc();
      return ret;
    }
    else {
      cons.cancel_critical_section(cpuid);
      numactive.inc();
      return false;
    }
  }
  
  void thread(size_t cpuid) {
    while(1) {
       std::pair<size_t, bool> job = queue.try_dequeue();
       if (job.second == false) {
          bool ret = try_terminate(cpuid, job);
          if (ret == true) break;
          if (ret == false && job.second == false) continue;
       }
       task(job.first);
    }
  }
  
  void start_thread() {
    thread_group thrgrp; 
    for (size_t i = 0;i < 4; ++i) {
      thrgrp.launch(boost::bind(
                            &simple_engine_test::thread,
                            this, i));
    }
    
    thrgrp.join();
    ASSERT_EQ(queue.size(), 0);
  }
};


int main(int argc, char ** argv) {
  /** Initialization */
  mpi_tools::init(argc, argv);
  global_logger().set_log_level(LOG_DEBUG);

  dc_init_param param;
  if (init_param_from_mpi(param) == false) {
    return 0;
  }
  distributed_control dc(param);
  simple_engine_test test(dc);
  test.add_task_local(1000);
  test.start_thread();
  mpi_tools::finalize();
}


================================================
FILE: tests/dc_fiber_consensus_test.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <map>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/rpc/fiber_async_consensus.hpp>
#include <graphlab/util/blocking_queue.hpp>
#include <graphlab/parallel/fiber_group.hpp>
using namespace graphlab;

#define NTHREADS 1000


class simple_engine_test {
 public:
  dc_dist_object<simple_engine_test> rmi;
  blocking_queue<size_t> queue;
  fiber_async_consensus cons;
  atomic<size_t> numactive;;

  simple_engine_test(distributed_control &dc):rmi(dc, this), cons(dc, NTHREADS) {
    numactive.value = NTHREADS; 
    dc.barrier();
  }

  void add_task_local(size_t i) {
    queue.enqueue(i);
    if (numactive.value < NTHREADS) cons.cancel();
  }  
  
  void task(size_t i) {
    if (i < 5) std::cout << "Task " << i << std::endl;
    if (i > 0) {
      if (rmi.numprocs() == 1) {
        add_task_local(i - 1);
      }
      else {
        rmi.remote_call((procid_t)((rmi.procid() + 1) % rmi.numprocs()),
                    &simple_engine_test::add_task_local,
                    i - 1);
      }
    }
  }
  
  bool try_terminate(size_t cpuid, std::pair<size_t, bool> &job) {
    job.second = false;
    
    numactive.dec();
    cons.begin_done_critical_section(cpuid);
    job = queue.try_dequeue();
    if (job.second == false) {
      bool ret = cons.end_done_critical_section(cpuid);
      numactive.inc();
      return ret;
    }
    else {
      cons.cancel_critical_section(cpuid);
      numactive.inc();
      return false;
    }
  }
  
  void thread(size_t cpuid) {
    while(1) {
       std::pair<size_t, bool> job = queue.try_dequeue();
       if (job.second == false) {
          bool ret = try_terminate(cpuid, job);
          if (ret == true) break;
          if (ret == false && job.second == false) continue;
       }
       task(job.first);
    }
  }
  
  void start_thread() {
    fiber_group thrgrp; 
    for (size_t i = 0;i < NTHREADS; ++i) {
      thrgrp.launch(boost::bind(
                            &simple_engine_test::thread,
                            this, i));
    }
    
    thrgrp.join();
    ASSERT_EQ(queue.size(), 0);
  }
};


int main(int argc, char ** argv) {
  /** Initialization */
  mpi_tools::init(argc, argv);
  global_logger().set_log_level(LOG_DEBUG);

  dc_init_param param;
  if (init_param_from_mpi(param) == false) {
    return 0;
  }
  distributed_control dc(param);
  simple_engine_test test(dc);
  test.add_task_local(300);
  test.start_thread();
  dc.barrier();
  mpi_tools::finalize();
}


================================================
FILE: tests/dc_test_sequentialization.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <string>
#include <map>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/rpc/async_consensus.hpp>
using namespace graphlab;


class seq_test {
 public:
  dc_dist_object<seq_test> rmi;
  std::vector<size_t> ctr;
  seq_test(distributed_control &dc):rmi(dc, this), ctr(100,0) {
    rmi.barrier();
  }
  
  void recv(size_t idx, size_t val) {
    ASSERT_EQ(thread::thread_id(), idx);
    ASSERT_EQ(ctr[idx], val);
    ++ctr[idx];
  }

  void run() {
    for (size_t i = 1; i < 2; ++i) {
      rmi.dc().set_sequentialization_key(i);
      for (size_t j = 0;j < 1000000; ++j) {
        rmi.remote_call(1, &seq_test::recv, i, j);
      }
    }
  }
};


int main(int argc, char ** argv) {
  /** Initialization */
  mpi_tools::init(argc, argv);
  global_logger().set_log_level(LOG_DEBUG);

  dc_init_param param;
  if (init_param_from_mpi(param) == false) {
    return 0;
  }
  distributed_control dc(param);
  seq_test test(dc);
  if (dc.procid() == 0) {
    test.run();
  }
  dc.full_barrier();
  mpi_tools::finalize();
}


================================================
FILE: tests/dcsc_test.cpp
================================================
#include <iostream>
#include <vector>
#include <cassert>
#include <graphlab/graph/dcsc_store.hpp>
#include <graphlab/macros_def.hpp>
using namespace graphlab;

int main(int argc, char** argv) {
  dcsc_store<uint32_t> store;
  std::cout << store;
  // basic tests
  store.insert(1, 2, 1);
  store.insert(2, 5, 2);
  store.insert(4, 4, 3);
  store.insert(4, 5, 4);
  store.insert(0, 1, 5);
  store.insert(0, 5, 6);
  store.insert(0, 3, 7);
  store.insert(3, 3, 8);
  store.insert(4, 3, 9);

  std::cout << store;

  std::cout << "\n\nPrinting column 0\n";
  typedef dcsc_store<uint32_t>::entry_type entry_type;
  foreach(const entry_type e, store.get_column(0)) {
    std::cout << "(" << e.row() << ", " << e.column() << ") = " << e.value() << "\n";
  }

  std::cout << "\n\nPrinting column 5\n";
  foreach(entry_type e, store.get_column(5)) {
    std::cout << "(" << e.row() << ", " << e.column() << ") = " << e.value() << "\n";
  }


  std::cout << "\n\nChanging column 3 to all 1s\n";
  foreach(entry_type e, store.get_column(3)) {
    e.value() = 1;
  }

  std::cout << store;

  srand(10);
  store.clear();
  std::vector<uint32_t> row, col, val;
  for (size_t i = 0;i < 10000; ++i) {
    row.push_back(rand());
    col.push_back(rand());
    val.push_back(rand());
  }

  store.construct(row.begin(), row.end(),
                  col.begin(), col.end(),
                  val.begin(), val.end());

  for (size_t i = 0;i < 10000; ++i) {
    assert(store.find(row[i], col[i]) == val[i]);
  }


}


================================================
FILE: tests/dense_bitset_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cxxtest/TestSuite.h>
#include <graphlab/util/dense_bitset.hpp>
#include <graphlab/macros_def.hpp>
using namespace graphlab;

class DenseBitsetTestSuite : public CxxTest::TestSuite {
public:
  void test_densebitset(void) {
    dense_bitset d;
    d.resize(100);
    d.clear();
    size_t probelocations[7] = {0, 10, 12, 50, 66, 81, 99};
    // test setting
    for (size_t i= 0;i < 7; ++i) {
      d.set_bit(probelocations[i]);
    }
    
    for (size_t i = 0;i< 100; ++i) {
      bool inprobe=false;
      for (size_t j = 0;j <7; ++j) inprobe |= (probelocations[j] == i);
      TS_ASSERT_EQUALS(d.get(i), inprobe);
    }

    // test iteration
    size_t iter = (size_t)(-1);
    TS_ASSERT_EQUALS(d.first_bit(iter), true)
    for (size_t i= 0;i < 7; ++i) {
      TS_ASSERT_EQUALS(iter, probelocations[i]);
      bool ret = d.next_bit(iter);
      TS_ASSERT_EQUALS(ret, i < 6);
    }
    size_t ctr = 0;
    foreach(iter, d) {
      TS_ASSERT(ctr < 7);
      TS_ASSERT_EQUALS(iter, probelocations[ctr]);
      ++ctr;
    }
    
    std::stringstream strm;
    graphlab::oarchive oarc(strm);
    oarc << d;
    strm.flush();
    graphlab::iarchive iarc(strm);
    dense_bitset d2;
    iarc >> d2;


    for (size_t i = 0;i< 100; ++i) {
      bool inprobe=false;
      for (size_t j = 0;j <7; ++j) inprobe |= (probelocations[j] == i);
      TS_ASSERT_EQUALS(d2.get(i), inprobe);
    }
    // testclearing
    for (size_t i= 0;i < 7; ++i) {
      d.clear_bit(probelocations[i]);
    }
    for (size_t i = 0;i< 100; ++i) {
      TS_ASSERT_EQUALS(d.get(i), false);
    }

    d.fill();
    ASSERT_EQ(d.popcount(), d.size());
    d.invert();
    ASSERT_EQ(d.popcount(), 0);
    d.invert();
    ASSERT_EQ(d.popcount(), d.size());

    d2.fill();
    ASSERT_EQ(d2.popcount(), d2.size());


  }


  void test_fixeddensebitset(void) {
    fixed_dense_bitset<100> d;
    size_t probelocations[7] = {0, 10, 12, 50, 66, 81, 99};
    // test setting
    for (size_t i= 0;i < 7; ++i) {
      d.set_bit(probelocations[i]);
    }
    
    for (size_t i = 0;i< 100; ++i) {
      bool inprobe=false;
      for (size_t j = 0;j <7; ++j) inprobe |= (probelocations[j] == i);
      TS_ASSERT_EQUALS(d.get(i), inprobe);
    }

    // test iteration
    size_t iter = (size_t)(-1);
    TS_ASSERT_EQUALS(d.first_bit(iter), true)
    for (size_t i= 0;i < 7; ++i) {
      TS_ASSERT_EQUALS(iter, probelocations[i]);
      bool ret = d.next_bit(iter);
      TS_ASSERT_EQUALS(ret, i < 6);
    }
    
    size_t ctr = 0;
    foreach(iter, d) {
      TS_ASSERT(ctr < 7);
      TS_ASSERT_EQUALS(iter, probelocations[ctr]);
      ++ctr;
    }

    std::stringstream strm;
    graphlab::oarchive oarc(strm);
    oarc << d;
    strm.flush();
    graphlab::iarchive iarc(strm);
    fixed_dense_bitset<100> d2;
    iarc >> d2;


    for (size_t i = 0;i< 100; ++i) {
      bool inprobe=false;
      for (size_t j = 0;j <7; ++j) inprobe |= (probelocations[j] == i);
      TS_ASSERT_EQUALS(d2.get(i), inprobe);
    }
    
    
    // testclearing
    for (size_t i= 0;i < 7; ++i) {
      d.clear_bit(probelocations[i]);
    }
    for (size_t i = 0;i< 100; ++i) {
      TS_ASSERT_EQUALS(d.get(i), false);
    }

    d.fill();
    ASSERT_EQ(d.popcount(), d.size());

    d2.fill();
    ASSERT_EQ(d2.popcount(), d2.size());

  }


};


================================================
FILE: tests/dht_performance_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/util/timer.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/util/generics/any.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>    
#include <graphlab/rpc/dht.hpp>
#include <graphlab/logger/logger.hpp>
using namespace graphlab;

std::string randstring(size_t len) {
  std::string str;
  str.resize(len);
  const char *charset="ab";
  size_t charsetlen = 64;
  for (size_t i = 0;i < len; ++i) {
    str[i] = charset[rand()  % charsetlen];
  }
  return str;
}

int main(int argc, char ** argv) {
  //mpi_tools::init(argc, argv);
  global_logger().set_log_level(LOG_INFO);

  dc_init_param param;
  mpi_tools::init(argc, argv);
  if (!init_param_from_mpi(param)) {
    return 0;
  }
  
  global_logger().set_log_level(LOG_DEBUG);
  distributed_control dc(param);
  std::cout << "I am machine id " << dc.procid() 
            << " in " << dc.numprocs() << " machines"<<std::endl;
  dht<std::string, std::string> testdht(dc);
  
  std::vector<std::pair<std::string, std::string> > data;
  const size_t NUMSTRINGS = 10000;
  const size_t strlen[4] = {16, 128, 1024, 10240};
  // fill rate
  for (size_t l = 0; l < 4; ++l) {
    timer ti;
    ti.start();
    if (dc.procid() == 0) {
      std::cout << "String Length = " << strlen[l] << std::endl;
      data.clear();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        data.push_back(std::make_pair(randstring(8), randstring(strlen[l])));
      }
      std::cout << "10k random strings generated" << std::endl;
      std::cout << "Starting set" << std::endl;
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        testdht.set(data[i].first, data[i].second);
        if (i % 100 == 0) {
          std::cout << ".";
          std::cout.flush();
        }
      }
      std::cout << "10k insertions in " << ti.current_time() << std::endl;
    }
      dc.full_barrier();
    if (dc.procid() == 0) {
      std::cout << "--> Time to Insertion Barrier " << ti.current_time() << std::endl;
    }
    // get rate
    if (dc.procid() == 0) {
      std::cout << "Starting get" << std::endl;

      timer ti;
      ti.start();
      for (size_t i = 0;i < NUMSTRINGS; ++i) {
        std::pair<bool, std::string> ret = testdht.get(data[i].first);
        assert(ret.first);
        if (i % 100 == 0) {
          std::cout << ".";
          std::cout.flush();
        }
      }
      std::cout << "10k reads in " << ti.current_time() << std::endl;
    }
    testdht.clear();
  }
  dc.barrier();
  testdht.print_stats();
  mpi_tools::finalize();
}


================================================
FILE: tests/distributed_chandy_misra_test.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <string>
#include <boost/unordered_set.hpp>
#include <graphlab/options/command_line_options.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/fs_util.hpp>
#include <graphlab/graph/graph_ops.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/engine/distributed_chandy_misra.hpp>
#include <graphlab/graph/distributed_graph.hpp>


#include <graphlab/macros_def.hpp>

#define INITIAL_NLOCKS_TO_ACQUIRE 1000
graphlab::mutex mt;
graphlab::conditional cond;
std::vector<graphlab::vertex_id_type> lockable_vertices;
boost::unordered_map<graphlab::vertex_id_type, size_t> demand_set;
boost::unordered_map<graphlab::vertex_id_type, size_t> current_demand_set;
boost::unordered_map<graphlab::vertex_id_type, size_t> locked_set;
size_t nlocksacquired ;

size_t nlocks_to_acquire;

struct vertex_data {
  uint32_t nupdates;
  double value, old_value;
  vertex_data(double value = 1) :
    nupdates(0), value(value), old_value(0) { }
}; // End of vertex data
SERIALIZABLE_POD(vertex_data);
std::ostream& operator<<(std::ostream& out, const vertex_data& vdata) {
  return out << "Rank=" << vdata.value;
}

struct edge_data { }; // End of edge data
SERIALIZABLE_POD(edge_data);

typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


graphlab::distributed_chandy_misra<graph_type> *locks;
graph_type *ggraph;
graphlab::blocking_queue<graphlab::vertex_id_type> locked_elements;


void callback(graphlab::vertex_id_type v) {
  //logstream(LOG_INFO) << "Locked " << ggraph->global_vid(v) << std::endl;
  mt.lock();
  ASSERT_EQ(current_demand_set[v], 1);
  locked_set[v]++;
  nlocksacquired++;
  mt.unlock();
//  graphlab::my_sleep(1);
  locked_elements.enqueue(v);
}


void thread_stuff() {
  std::pair<graphlab::vertex_id_type, bool> deq;
  while(1) {
    deq = locked_elements.dequeue();
    if (deq.second == false) break;
    else {
      locks->philosopher_stops_eating(deq.first);
      mt.lock();
      current_demand_set[deq.first] = 0;
      bool getnextlock = nlocks_to_acquire > 0;
      if (nlocks_to_acquire > 0) {
        nlocks_to_acquire--;
        if (nlocks_to_acquire % 100 == 0) {
          std::cout << "Remaining: " << nlocks_to_acquire << std::endl;
        }
      }
      if (nlocks_to_acquire == 0 &&
        nlocksacquired == INITIAL_NLOCKS_TO_ACQUIRE + lockable_vertices.size()) cond.signal();
      mt.unlock();

      if (getnextlock > 0) {
        graphlab::vertex_id_type toacquire = 0;
        while(1) {
          mt.lock();
           toacquire = lockable_vertices[graphlab::random::rand() %
                                         lockable_vertices.size()];
          if (current_demand_set[toacquire] == 0) {
            current_demand_set[toacquire] = 1;
            demand_set[toacquire]++;
            mt.unlock();
            break;
          }
          mt.unlock();
        }
        locks->make_philosopher_hungry(toacquire);
      }
    }
  }
}


int main(int argc, char** argv) {
//   global_logger().set_log_level(LOG_INFO);
//   global_logger().set_log_to_console(true);

  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::dc_init_param rpc_parameters;
  graphlab::init_param_from_mpi(rpc_parameters);
  graphlab::distributed_control dc(rpc_parameters);


  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("distributed chandy misra test.");
  std::string format = "adj";
  std::string graph_dir = "";
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("format",format,
                       "The graph file format: {metis, snap, tsv, adj, bin}");
  size_t ring = 0;
  clopts.attach_option("ring", ring,
                       "The size of the ring. "
                       "If ring=0 then the graph file is used.");
  size_t randomconnect = 0;
  clopts.attach_option("randomconnect", randomconnect,
                       "The size of a randomly connected network. "
                       "If randomconnect=0 then the graph file is used.");

  if(!clopts.parse(argc, argv)) {
    std::cout << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  std::cout << dc.procid() << ": Starting." << std::endl;
  graphlab::timer timer; timer.start();
  graph_type graph(dc, clopts);
  ggraph = &graph;
  if(ring > 0) {
    if(dc.procid() == 0) {
      for(size_t i = 0; i < ring; ++i) graph.add_edge(i, i + 1);
      graph.add_edge(ring, 0);
    }
  } else if(randomconnect > 0) {
    if(dc.procid() == 0) {
      for(size_t i = 0; i < randomconnect; ++i) {
        std::vector<bool> v(randomconnect, false);
        v[i] = true;
        for (size_t r = 0; r < randomconnect /2 ; ++r) {
          size_t t = graphlab::random::rand() % randomconnect;
          if (v[t] == false && t > i) {
            graph.add_edge(i, t);
            //            std::cout << i << "->" << t << "\n";
            v[t] = true;
          }
        }
      }
    }
  } else {
    std::vector<std::string> graph_files;
    graphlab::fs_util::list_files_with_prefix(graph_dir, "", graph_files);
    for(size_t i = 0; i < graph_files.size(); ++i) {
      if (i % dc.numprocs() == dc.procid()) {
        const std::string graph_fname = graph_dir + graph_files[i];
        std::cout << "Loading graph from structure file: " << graph_fname
                  << std::endl;
        graph.load_format(graph_fname, format);
      }
    }
  }
  std::cout << dc.procid() << ": Enter Finalize" << std::endl;
  graph.finalize();
  
  boost::unordered_set<size_t> eidset1;
  boost::unordered_set<size_t> eidset2;
  typedef graph_type::local_edge_type  local_edge_type;
  typedef graph_type::local_edge_list_type local_edge_list_type;
 
  for (size_t v = 0; v < graph.num_local_vertices(); ++v) {
    const local_edge_list_type& in_edges = graph.l_in_edges(v);
    foreach(const local_edge_type& edge, in_edges) {
      size_t edgeid = edge.id();
      ASSERT_TRUE(eidset1.find(edgeid) == eidset1.end());
      eidset1.insert(edgeid);
    }
    const local_edge_list_type& out_edges = graph.l_out_edges(v);
    foreach(const local_edge_type& edge, out_edges) {
      size_t edgeid = edge.id();
      ASSERT_TRUE(eidset1.find(edgeid) == eidset1.end());
      ASSERT_TRUE(eidset2.find(edgeid) == eidset2.end());
      eidset2.insert(edgeid);
    }
  }
  ASSERT_EQ(eidset1.size(), eidset2.size());
  eidset1.clear(); eidset2.clear();
  
  std::cout << " ==============================================================="
            << std::endl;
  std::cout << dc.procid() << ": Finished in " << timer.current_time() << std::endl;

  std::cout
    << "========== Graph statistics on proc " << dc.procid()
    << " ==============="
    << "\n Num vertices: " << graph.num_vertices()
    << "\n Num edges: " << graph.num_edges()
    << "\n Num replica: " << graph.num_replicas()
    << "\n Replica to vertex ratio: "
    << (float)graph.num_replicas()/graph.num_vertices()
    << "\n --------------------------------------------"
    << "\n Num local own vertices: " << graph.num_local_own_vertices()
    << "\n Num local vertices: " << graph.num_local_vertices()
    << "\n Replica to own ratio: "
    << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
    << "\n Num local edges: " << graph.num_local_edges()
    << "\n Edge balance ratio: " << (float)graph.num_local_edges()/graph.num_edges()
    << std::endl;

  // for (graphlab::vertex_id_type v = 0; v < graph.num_local_vertices(); ++v) {
  //   std::cout << graph.l_get_vertex_record(v).gvid << ": " << graph.l_get_vertex_record(v).owner << ":";
  //   foreach(graphlab::procid_t pid,  graph.l_get_vertex_record(v).get_replicas()) {
  //     std::cout << pid << " ";
  //   }
  //   std::cout << "\n";
  // }
  dc.barrier();
  locks = new graphlab::distributed_chandy_misra<graph_type>(dc, graph, callback);
  nlocksacquired = 0;
  nlocks_to_acquire = INITIAL_NLOCKS_TO_ACQUIRE;
  dc.full_barrier();
  for (graphlab::vertex_id_type v = 0; v < graph.num_local_vertices(); ++v) {
    if (graph.l_get_vertex_record(v).owner == dc.procid()) {
      demand_set[v] = 1;
      current_demand_set[v] = 1;
      lockable_vertices.push_back(v);
    }
  }
  dc.full_barrier();
  graphlab::thread_group thrs;
  for (size_t i = 0;i < 10; ++i) {
    thrs.launch(thread_stuff);
  }
  for (graphlab::vertex_id_type v = 0; v < graph.num_local_vertices(); ++v) {
    if (graph.l_get_vertex_record(v).owner == dc.procid()) {
      //std::cout << dc.procid() << ": Lock Req for " << graph.l_get_vertex_record(v).gvid << std::endl;
      locks->make_philosopher_hungry(v);
    }
  }
  mt.lock();
  while (nlocksacquired != INITIAL_NLOCKS_TO_ACQUIRE + lockable_vertices.size()) cond.wait(mt);
  mt.unlock();
  dc.barrier();
  locked_elements.stop_blocking();
  thrs.join();
  std::cout << INITIAL_NLOCKS_TO_ACQUIRE + lockable_vertices.size() << " Locks to acquire\n";
  std::cout << nlocksacquired << " Locks Acquired in total\n";
  boost::unordered_map<graphlab::vertex_id_type, size_t>::const_iterator iter = demand_set.begin();
  bool bad = (nlocksacquired != INITIAL_NLOCKS_TO_ACQUIRE + lockable_vertices.size());
  while (iter != demand_set.end()) {
    if(locked_set[iter->first] != iter->second) {
      std::cout << graph.l_get_vertex_record(iter->first).gvid << " mismatch: "
                << locked_set[iter->first] << ", " << iter->second << "\n";
      bad = true;
    }
    ++iter;
  }
  if (bad) {
    locks->print_out();
  }
  dc.barrier();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: tests/distributed_graph_test.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

// standard C++ headers
#include <iostream>
#include <vector>
#include <cxxtest/TestSuite.h>


template<typename T>
std::vector<T> operator+=(std::vector<T>& v1, const std::vector<T>& v2) {
  for (size_t i = 0; i < v2.size(); ++i)
    v1.push_back(v2[i]);
  return v1;
}


#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/macros_def.hpp>


graphlab::distributed_control* dc;

template<typename K, typename V>
class map_reduce;

namespace tests{
class distributed_graph_test  {
 public:
   struct vertex_data: public graphlab::IS_POD_TYPE  {
     size_t value;
     vertex_data() : value(0) { }
     vertex_data(size_t n) : value(n) { }
     bool operator==(const vertex_data& other)  const {
       return value == other.value;
     }
   };

   struct edge_data: public graphlab::IS_POD_TYPE  {
     int from;
     int to;
     edge_data (int f = 0, int t = 0) : from(f), to(t) {}
     bool operator==(const edge_data& other)  const {
       return ((from == other.from) && (to == other.to));
     }
   };

   /**
    * Test adding vertex.
    */
   void test_add_vertex() {
     graphlab::distributed_graph<vertex_data, edge_data> g(*dc);
     test_add_vertex_impl(g, 100);
     test_add_vertex_impl(g, 1000);
     test_add_vertex_impl(g, 10000);
     dc->cout() << "\n+ Pass test: graph add vertex. :) \n";
   }

   /**
    * Test adding edges
    */
   void test_add_edge() {
     graphlab::distributed_graph<vertex_data, edge_data> g(*dc);
     test_add_edge_impl(g, 10);
     test_add_edge_impl(g, 1000);
     test_add_edge_impl(g, 10000);
     dc->cout() << "\n+ Pass test: graph add edge. :) \n";
   }

   /**
    * Test adding edges
    */
   void test_dynamic_add_edge() {
     graphlab::distributed_graph<vertex_data, edge_data> g(*dc);
     if (g.is_dynamic()) {
       test_add_edge_impl(g, 10, true);
       test_add_edge_impl(g, 1000, true);
       test_add_edge_impl(g, 10000, true);
       dc->cout() << "\n+ Pass test: graph dynamically add edge. :) \n";
     } else {
       dc->cout() << "\n- Graph does not support dynamic. Please compile with -DUSE_DYNAMIC_GRAPH \n";
     }
   }

   /**
    * Test save load
    */
   void test_save_load() {
     graphlab::distributed_graph<vertex_data, edge_data> g(*dc);
     for (size_t i = 0; i < 10; ++i) {
       g.add_edge(i, (i+1), edge_data(i, i+1));
     }
     g.finalize();
     test_save_load_impl(g);
     if (g.is_dynamic()) {
       for (size_t i = 0; i < 10; ++i) {
         g.add_edge(i+1, (i), edge_data(i+1, i));
       }
       g.finalize();
       test_save_load_impl(g);
     }
     dc->cout() << "\n+ Pass test: graph save load binary. :) \n";
   }

 private: 
   template<typename Graph>
       void test_add_vertex_impl(Graph& g, size_t nverts) {
         g.clear();
         ASSERT_EQ(g.num_vertices(), 0);
         for (size_t i = 0; i < nverts; ++i) {
           g.add_vertex(i, vertex_data(i));
         }
         ASSERT_EQ(g.num_vertices(), 0); 
         g.finalize();
         for (size_t i = 0; i < g.num_local_vertices(); ++i) {
           ASSERT_EQ(g.l_vertex(i).data().value, g.global_vid(i));
         }
         ASSERT_EQ(g.num_vertices(), nverts);

         // Test dynamic graph capability
         if (g.is_dynamic()) {
           // dynamic graph should support adding vertices after finalization
           // add more vertices and override existing vertex values
           for (size_t i = 0; i < 2*nverts; ++i) {
             g.add_vertex(i, vertex_data(i*2));
           }
           g.finalize();
           ASSERT_EQ(g.num_vertices(), 2*nverts);
           for (size_t i = 0; i < g.num_local_vertices(); ++i) {
             ASSERT_EQ(g.l_vertex(i).data().value, g.global_vid(i) * 2);
           }
         }
       }

   template<typename Graph>
       void test_add_edge_impl(Graph& g, size_t nedges, bool use_dynamic = false) {
         typedef typename Graph::vertex_id_type vertex_id_type;
         srand(0);
         g.clear();
         ASSERT_EQ(g.num_edges(), 0);
         boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > out_edges;
         boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > in_edges;
         boost::unordered_set< std::pair<vertex_id_type,vertex_id_type> > all_edges;
         while (all_edges.size() < nedges) {
           vertex_id_type src = rand() % (int)(3*sqrt(nedges));
           vertex_id_type dst = rand() % (int)(3*sqrt(nedges));
           if (src == dst)
             continue;
           std::pair<vertex_id_type,vertex_id_type> pair(src, dst);
           if (!all_edges.count(pair))  {
             all_edges.insert(pair);
             if (!out_edges.count(src)) {
               out_edges[src] = std::vector<vertex_id_type>();
             } 
             if (!in_edges.count(dst)) {
               in_edges[dst] = std::vector<vertex_id_type>();
             }
             in_edges[dst].push_back(src);
             out_edges[src].push_back(dst);
           }
         }
         typedef typename boost::unordered_set< std::pair<vertex_id_type,vertex_id_type> >::value_type pair_type; 
         int count = 0;
         foreach (const pair_type& p, all_edges) {
           if (count++ % dc->numprocs() == dc->procid()) {
             g.add_edge(p.first, p.second, edge_data(p.first, p.second));
           }
           if (use_dynamic && count % (nedges/5) == 0) {
             g.finalize();
           } 
         }
         if (!use_dynamic)
           ASSERT_EQ(g.num_edges(), 0); 

         g.finalize();
         check_adjacency(g, in_edges, out_edges, all_edges.size());
         check_edge_data(g);
         check_vertex_info(g);
       }

   template<typename Graph>
       void test_save_load_impl(Graph& g) {
         typedef typename Graph::local_edge_type local_edge_type;

         using namespace boost::filesystem;
         path ph = unique_path();
         if (create_directory(ph)) {
           path prefix = ph;
           prefix /= "test"; 
           dc->cout() << "Save to path: " << prefix.string() << std::endl;
           g.save_binary(prefix.string());

           Graph g2(*dc);
           g2.load_binary(prefix.string());
           ASSERT_EQ(g.num_vertices(), g2.num_vertices());
           ASSERT_EQ(g.num_edges(), g2.num_edges());

           for (size_t i = 0; i < g.num_local_vertices(); ++i) {
             // check vertex records
             ASSERT_TRUE(g.l_get_vertex_record(i) == g2.l_get_vertex_record(i));
             // check vertex data 
             ASSERT_TRUE(g.l_vertex(i).data() == g2.l_vertex(i).data());

             // check local in edges
             ASSERT_EQ(g.l_in_edges(i).size(), g2.l_in_edges(i).size());
             size_t in_edge_size = g.l_in_edges(i).size();
             for (size_t j = 0; j < in_edge_size; ++j) {
               ASSERT_EQ(g.l_in_edges(i)[j].source().lvid,
                         g2.l_in_edges(i)[j].source().lvid);
               ASSERT_EQ(g.l_in_edges(i)[j].target().lvid,
                         g2.l_in_edges(i)[j].target().lvid);
               ASSERT_TRUE(g.l_in_edges(i)[j].data() == g2.l_in_edges(i)[j].data());
             }

             // check local out edges
             ASSERT_EQ(g.l_out_edges(i).size(), g2.l_out_edges(i).size());
             size_t out_edge_size = g.l_out_edges(i).size();
             for (size_t j = 0; j < out_edge_size; ++j) {
               ASSERT_EQ(g.l_out_edges(i)[j].source().lvid,
                         g2.l_out_edges(i)[j].source().lvid);
               ASSERT_EQ(g.l_out_edges(i)[j].target().lvid,
                         g2.l_out_edges(i)[j].target().lvid);
               ASSERT_TRUE(g.l_out_edges(i)[j].data() == g2.l_out_edges(i)[j].data());
             }
           }
           dc->cout() << "Remove path: " << ph.string()<< std::endl;
           remove_all(ph);
         } else {
           dc->cout() << "Unable to create tmp directory:" << ph.string() << std::endl;
         }
       }

   template<typename Graph>
       void check_edge_data(Graph& g) {
         typedef typename Graph::local_edge_list_type local_edge_list_type;
         typedef typename Graph::local_edge_type local_edge_type;
         typedef typename Graph::vertex_type vertex_type;
         typedef typename Graph::vertex_id_type vertex_id_type;
         for (size_t i = 0; i < g.num_local_vertices(); ++i) {
           const local_edge_list_type& in_edges = g.l_in_edges(i);
           foreach (const local_edge_type& e, in_edges) {
             ASSERT_EQ(e.data().from, g.global_vid(e.source().id()));
             ASSERT_EQ(e.data().to, g.global_vid(e.target().id()));
           }
           const local_edge_list_type& out_edges = g.l_out_edges(i);
           foreach (const local_edge_type& e, out_edges) {
             ASSERT_EQ(e.data().from, g.global_vid(e.source().id()));
             ASSERT_EQ(e.data().to, g.global_vid(e.target().id()));
           }
         }
       }

   /**
    * Helper function to check the in/out edges of the graph.
    */
   template<typename Graph>
       void check_adjacency(Graph& g, 
                            boost::unordered_map<typename Graph::vertex_id_type, 
                            std::vector<typename Graph::vertex_id_type> >& in_edges,
                            boost::unordered_map<typename Graph::vertex_id_type, 
                            std::vector<typename Graph::vertex_id_type> >& out_edges,
                            size_t nedges) {
         typedef typename Graph::local_edge_list_type local_edge_list_type;
         typedef typename Graph::local_edge_type local_edge_type;
         typedef typename Graph::vertex_type vertex_type;
         typedef typename Graph::vertex_id_type vertex_id_type;

         // check total edge size 
         ASSERT_EQ(g.num_edges(), nedges);
         size_t sum_local_edges = g.num_local_edges();
         dc->all_reduce(sum_local_edges);
         ASSERT_EQ(g.num_edges(), sum_local_edges);

         // check local edge size
         size_t local_in_edge_size = 0;
         size_t local_out_edge_size = 0;
         for (size_t i = 0; i < g.num_local_vertices(); ++i) {
           local_in_edge_size += g.l_in_edges(i).size();
           local_out_edge_size += g.l_out_edges(i).size();
         }
         ASSERT_EQ(local_in_edge_size, g.num_local_edges());
         ASSERT_EQ(local_out_edge_size, g.num_local_edges());

         // check adjacency list
         typedef map_reduce< vertex_id_type, std::vector<vertex_id_type> > dist_adj_type;
         dist_adj_type local_out_adj, local_in_adj;

         for (size_t i = 0; i < g.num_local_vertices(); ++i) {
           std::vector<vertex_id_type> outids, inids;
           vertex_id_type gvid = g.global_vid(i);
           const local_edge_list_type& ls_out = g.l_out_edges(i);
           const local_edge_list_type& ls_in = g.l_in_edges(i);
           foreach (const local_edge_type& e, ls_out) {
             ASSERT_EQ(e.source().id(), i);
             outids.push_back(g.global_vid(e.target().id()));
           }
           foreach (const local_edge_type& e, ls_in) {
             ASSERT_EQ(e.target().id(), i);
             inids.push_back(g.global_vid(e.source().id()));
           }
           local_out_adj.data[gvid] = outids;
           local_in_adj.data[gvid] = inids;
         }
         dc->all_reduce(local_out_adj);
         dc->all_reduce(local_in_adj);

         typedef typename boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> >::const_iterator iter_type;

         // check out adjacency 
         for (iter_type it = out_edges.begin(); it != out_edges.end(); ++it) {
           vertex_id_type id = it->first;
           std::vector<vertex_id_type> expected = it->second;
           std::vector<vertex_id_type> actual = local_out_adj.data[id];
           std::sort(actual.begin(), actual.end()); std::sort(expected.begin(), expected.end());
           ASSERT_EQ(actual.size(), expected.size());
           if (g.vid2lvid.count(id))
             ASSERT_EQ(g.num_out_edges(id), expected.size());
           for (size_t i = 0; i < actual.size(); ++i) {
             ASSERT_EQ(actual[i], expected[i]);
           }
         }

         // check in adjacency
         for (iter_type it = in_edges.begin(); it != in_edges.end(); ++it) {
           vertex_id_type id = it->first;
           std::vector<vertex_id_type> expected = it->second;
           std::vector<vertex_id_type> actual = local_in_adj.data[id];
           std::sort(actual.begin(), actual.end()); std::sort(expected.begin(), expected.end());
           ASSERT_EQ(actual.size(), expected.size());
           if (g.vid2lvid.count(id))
             ASSERT_EQ(g.num_in_edges(id), expected.size());
           for (size_t i = 0; i < actual.size(); ++i) {
             ASSERT_EQ(actual[i], expected[i]);
           }
         }
       }

   template<typename Graph>
       struct vertex_info {
         typename Graph::vertex_id_type vid;
         typename Graph::vertex_data_type data;
         typename Graph::mirror_type mirrors;
         graphlab::procid_t master;
         size_t num_in_edges, num_out_edges;

         bool operator==(const vertex_info& other) {
           return ((master == other.master) && 
                   (vid == other.vid) && 
                   (data == other.data) &&
                   (mirrors == other.mirrors) &&
                   (num_in_edges == other.num_in_edges) &&
                   (num_out_edges == other.num_out_edges));
         }

         void load(graphlab::iarchive& arc) {
           arc >> vid
               >> master 
               >> mirrors
               >> num_in_edges
               >> num_out_edges
               >> data;
         }

         void save(graphlab::oarchive& arc) const {
           arc << vid
               << master
               << mirrors
               << num_in_edges
               << num_out_edges
               << data;
         } // end of save
       };

   template<typename Graph>
       void check_vertex_info(Graph& g) {
         typedef typename Graph::vertex_id_type vertex_id_type;
         typedef typename Graph::vertex_data_type vertex_data_type;
         typedef typename Graph::vertex_type vertex_type;
         typedef typename Graph::local_vertex_type local_vertex_type;
         typedef vertex_info<Graph> vinfo_type;
         typedef typename boost::unordered_map<vertex_id_type, vinfo_type > vinfo_map_type; 

         vinfo_map_type vid2info;
         std::vector<vertex_id_type> vids;

         for (size_t i = 0; i < g.num_local_vertices(); ++i) {
           vertex_type v = g.vertex(g.global_vid(i));
           local_vertex_type lv = g.l_vertex(i);
           ASSERT_EQ(v.local_id(), lv.id());
           ASSERT_EQ(v.id(), lv.global_id());

           vinfo_type info;
           info.vid = v.id();
           info.num_in_edges = v.num_in_edges();
           info.num_out_edges = v.num_out_edges();
           info.data = v.data();
           info.mirrors = lv.mirrors();
           info.master = lv.owner();
           // master should not be in the mirror set
           ASSERT_TRUE(info.mirrors.get(info.master) == 0);

           vid2info[v.id()] = info;
           if (lv.owned()) 
             vids.push_back(v.id());
         }

         // gather the vid->record map on each machine
         std::vector<vinfo_map_type> vinfo_map_gather(dc->numprocs());
         vinfo_map_gather[dc->procid()] = vid2info;
         dc->all_gather(vinfo_map_gather);
         dc->all_reduce(vids);

         ASSERT_EQ(vids.size(), g.num_vertices());

         // check the consistency of vertex_record on each machine. 
         foreach(vertex_id_type vid, vids) {
           std::vector<vinfo_type> records;
           std::vector<size_t> mirror_expected;

           for (size_t i = 0; i < vinfo_map_gather.size(); ++i) {
             if (vinfo_map_gather[i].count(vid)) {
               records.push_back(vinfo_map_gather[i][vid]);
               mirror_expected.push_back(i);
             }
           }

           // check vertex records are consistent  across machines.
           for (size_t i = 1; i < records.size(); ++i) {
             ASSERT_TRUE(records[i] == records[0]);
           }

           // recevied record size == mirror size + 1
           ASSERT_EQ(records.size(), records[0].mirrors.popcount()+1);

           for (size_t i = 0; i < mirror_expected.size(); ++i) {
             size_t procid =  mirror_expected[i];
             ASSERT_TRUE(records[0].mirrors.get(procid) || (records[0].master == procid));
           }
         } // end for loop over all vertices
       }


}; // end of distributed_graph_test

} // namespace

using namespace tests;

template<typename K, typename V>
class map_reduce {
 public:
   boost::unordered_map<K, V> data; 
   void save(graphlab::oarchive& oarc) const {
     oarc << data;
   }
   void load(graphlab::iarchive& iarc) {
     iarc >> data;
   }
   map_reduce& operator+=(const map_reduce& other) {
     for (typename boost::unordered_map<K, V>::const_iterator it = other.data.begin();
          it != other.data.end(); ++it) {
       K key = it->first;
       V val = it->second;
       if (data.count(key)) {
         data[key] += val;
       } else {
         data[key] = val;
       }
     }
     return *this;
   }
};


int main(int argc, char** argv) {
  graphlab::mpi_tools::init(argc, argv);
  dc = new graphlab::distributed_control();

  // run tests
  distributed_graph_test testsuit; 
  testsuit.test_add_vertex();
  testsuit.test_add_edge();
  testsuit.test_dynamic_add_edge();
  testsuit.test_save_load();

  delete(dc);
  graphlab::mpi_tools::finalize();
}

#include <graphlab/macros_undef.hpp>


================================================
FILE: tests/distributed_ingress_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// standard C++ headers
#include <iostream>
#include <graphlab.hpp>
#include <graphlab/rpc/dc.hpp>
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/rpc/dc_init_from_mpi.hpp>
#include <graphlab/graph/distributed_graph.hpp>
// #include <google/malloc_extension.h>
#include <graphlab/macros_def.hpp>

typedef char vertex_data;
typedef std::string edge_data;

typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
typedef graph_type::vertex_record vertex_record; 

int main(int argc, char** argv) {
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO); 
  graphlab::command_line_options clopts("Distributed graph load test.");
  std::string graphpath; 
  bool gzip = false;
  std::string prefix = ""; 
  std::string format = "adj";

  clopts.attach_option("graph", graphpath,
                       "The graph path \n");

  clopts.attach_option("prefix", prefix,
                       "The prefix for load/save binary file\n");

  clopts.attach_option("gzip", gzip,
                       "The input is in gzip format\n");

  clopts.attach_option("format", format,
                       "format of the graph: {adj, snap}\n");

  
  if(!clopts.parse(argc, argv)) {
    logstream(LOG_FATAL) << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  graphlab::timer mytimer; mytimer.start();
  // global_logger().set_log_to_console(true);

  graph_type graph(dc, clopts);
  
  graph.load_format(graphpath, format);

  // size_t heap_size_load;
  // size_t allocate_size_load;
  // MallocExtension::instance()->GetNumericProperty("generic.heap_size", &heap_size_load);
  // MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &allocate_size_load);

  // if (dc.procid() == 0) {
  //   std::cout << "==========================================" << std::endl;
  //   std::cout << "Heap Size (before finalize): " << (double)heap_size_load/(1024*1024) << "MB" << "\n";
  //   std::cout << "Allocated Size (before finalize): " << (double)allocate_size_load/(1024*1024) << "MB" << "\n";
  //   std::cout << "==========================================" << std::endl;
  // }

  double time_to_load = mytimer.current_time();
  graph.finalize();
  double time_all = mytimer.current_time();

  std::cout << dc.procid() << ": Finished in " << mytimer.current_time() << std::endl;
    std::cout 
      << "========== Graph statistics on proc " << dc.procid() << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << (float)graph.num_replicas()/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      << "\n Edge balance ratio: " << (float)graph.num_local_edges()/graph.num_edges()
      << "\n --------------------------------------------" 
      << std::endl;
      std::cout << "==========================================" << std::endl;

   // size_t heap_size;
   // size_t allocate_size;
   // MallocExtension::instance()->GetNumericProperty("generic.heap_size", &heap_size);
   // MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &allocate_size);
   // if (dc.procid() == 0) {
   //   std::cout << "Heap Size: " << (double)heap_size/(1024*1024) << "MB" << "\n";
   //   std::cout << "Allocated Size: " << (double)allocate_size/(1024*1024) << "MB" << "\n";
   // }

  if (dc.procid() == 0) {
   std::ofstream fout;
   std::vector<std::string> keys = clopts.get_graph_args().get_option_keys();
   std::string ingress_method = "random";
   std::string constraint_graph = "na";
   std::string bufsize = "50000";
   bool usehash = false; 
   bool userecent = false; 

   foreach (std::string opt, keys) {
     if (opt == "ingress") {
       clopts.get_graph_args().get_option("ingress", ingress_method);
     } else if (opt == "bufsize") {
       clopts.get_graph_args().get_option("bufsize", bufsize);
     } else if (opt == "usehash") {
       clopts.get_graph_args().get_option("usehash", usehash);
     } else if (opt == "userecent") {
       clopts.get_graph_args().get_option("userecent", userecent);
     } else if (opt == "constrained_graph") {
       clopts.get_graph_args().get_option("constrained_graph", constraint_graph);
     }
   }

   fout.open("result.txt");
   fout << "#ingress: " << ingress_method  << std::endl
     << "#constraint: " << constraint_graph << std::endl
     << "#bufsize: " << bufsize << std::endl
     << "#usehash: " << usehash << std::endl
     << "#userecent: " << userecent
     << std::endl;

   fout << "Num procs: " << dc.numprocs() << std::endl;
   fout << "Replication factor: " << (float)graph.num_replicas()/graph.num_vertices() << std::endl;
   fout << "Balance factor: " << (float)graph.num_local_edges()/graph.num_edges() << std::endl;
   // fout << "Heap size (load): " << (double)heap_size_load/(1024*1024) << std::endl;
   // fout << "Allocated size (load): " << (double)allocate_size_load/(1024*1024) << std::endl;
   // fout << "Heap size (finalize): " << (double)heap_size/(1024*1024) << std::endl;
   // fout << "Allocated size (finalize): " << (double)allocate_size/(1024*1024) << std::endl;
   fout << "Runtime (load): " << time_to_load << std::endl;
   fout << "Runtime (total): " << time_all << std::endl;
   fout.close();
  }

  // graph.get_local_graph().save_adjacency("partition_"+boost::lexical_cast<std::string>(dc.procid())+".txt");
  // graph.save_format("partition", "snap", false, 1);

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main

#include <graphlab/macros_undef.hpp>


================================================
FILE: tests/empty_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <algorithm>
#include <iostream>

#include <graphlab/util/empty.hpp>

#include <cxxtest/TestSuite.h>

#include <graphlab/macros_def.hpp>

using namespace graphlab;

class empty_test : public CxxTest::TestSuite {
public:

  
  void test_empty() {
    std::vector<empty> v;
    v.resize(100);
    TS_ASSERT_EQUALS(v.size(), (size_t)100);
    size_t c = 0;
    foreach(empty e, v) {
      e = empty();
      ++c;
    }
    TS_ASSERT_EQUALS(c, (size_t)100);

    TS_ASSERT_EQUALS(v.end() - v.begin(), (int)100);
    TS_ASSERT_EQUALS(v.rend() - v.rbegin(), (int)100);
    
    std::vector<empty> v2;
    v2.assign(v.begin(), v.end());
    TS_ASSERT_EQUALS(v2.size(), (size_t)100);
    v2.assign(v.rbegin(), v.rend());
    TS_ASSERT_EQUALS(v2.size(), (size_t)100);

    v.insert(v.begin(), empty());
    TS_ASSERT_EQUALS(v.size(), (size_t)101);
     v.insert(v.end(), empty());
    TS_ASSERT_EQUALS(v.size(), (size_t)102);

    std::vector<empty>::const_iterator iter = v.begin();
    (*iter);
    ++iter; TS_ASSERT_EQUALS(v.end() - iter, (int)101);
    --iter; TS_ASSERT_EQUALS(v.end() - iter, (int)102);
    iter+=10; TS_ASSERT_EQUALS(v.end() - iter, (int)92);
    iter-=10; TS_ASSERT_EQUALS(v.end() - iter, (int)102);
    std::vector<empty>::const_iterator iter2 = iter;
    iter2 += 10;
    TS_ASSERT_EQUALS(iter2 - iter, (int)10);
  }

};


================================================
FILE: tests/engine_terminator_bench.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab.hpp>

#include <graphlab/macros_def.hpp>

#define RING_SIZE 200
#define NUM_ITERATIONS 1000

typedef graphlab::graph<size_t, size_t> graph_type;


class increment_update : public
                  graphlab::iupdate_functor<graph_type, increment_update> {
 public:
  void operator()(icontext_type& context) {
    ++context.vertex_data();
    foreach(edge_type edge, context.out_edges()) {
      const vertex_id_type nbr_id = edge.target();
      if (context.const_vertex_data(nbr_id) < NUM_ITERATIONS) {
        context.schedule(nbr_id, *this);    
      }
    }
  }
}; // end of shortest path update functor

void make_graph(graph_type &graph) {
  for (size_t i = 0;i < RING_SIZE; ++i) {
    graph.add_vertex(0);
  }
  
  for (size_t i = 0;i < RING_SIZE; ++i) {
    graph.add_edge(i, (i+1) % RING_SIZE, 0);
  }
}


class EngineTerminatorTestSuite: public CxxTest::TestSuite {
 public:  
  void test_engine_terminator() {
    // Create a graphlab core
    graphlab::core<graph_type, increment_update> core;
    make_graph(core.graph());
    core.graph().finalize();
    for (size_t ncpus = 1; ncpus <= 8; ++ncpus) {
      core.set_ncpus(ncpus);
      core.set_scope_type("edge");
      core.schedule(0, increment_update());
      const double runtime = core.start();
      std::cout << ncpus << " Procs: " << runtime << std::endl;
      // check the graph and reset it
      for (size_t i = 0;i < RING_SIZE; ++i) {
        if (core.graph().vertex_data(i) != (size_t)NUM_ITERATIONS) {
          std::cout << "vertex " << i << " ";
          TS_ASSERT_EQUALS(core.graph().vertex_data(i), (size_t)NUM_ITERATIONS);
        }
        core.graph().vertex_data(i) = 0;
      }
    }
  }
};


================================================
FILE: tests/fiber_test.cpp
================================================
#include <iostream>
#include <graphlab/parallel/fiber_group.hpp>
#include <graphlab/util/timer.hpp>
using namespace graphlab;
int numticks = 0;
void threadfn() {

  timer ti; ti.start();
  while(1) {
    if (ti.current_time() >= 1) break;
    fiber_control::yield();
    __sync_fetch_and_add(&numticks, 1);
  }
}


void threadfn2() {

  timer ti; ti.start();
  while(1) {
    if (ti.current_time() >= 2) break;
    fiber_control::yield();
    __sync_fetch_and_add(&numticks, 2);
  }
}

int main(int argc, char** argv) {
  timer ti; ti.start();
  fiber_group group;
  fiber_group group2;
  for (int i = 0;i < 100000; ++i) {
    group.launch(threadfn);
    group2.launch(threadfn2);
  }
  group.join();
  std::cout << "Completion in " << ti.current_time() << "s\n";
  std::cout << "Context Switches: " << numticks << "\n";
  group2.join();
  std::cout << "Completion in " << ti.current_time() << "s\n";
  std::cout << "Context Switches: " << numticks << "\n";
}


================================================
FILE: tests/fibo_fiber_test.cpp
================================================
#include <iostream>
#include <boost/bind.hpp>
#include <graphlab/parallel/fiber_control.hpp>
using namespace graphlab;

struct fibonacci_compute_promise {
  mutex* lock;
  size_t argument;
  size_t result;
  size_t parent_tid;
  bool result_set;
};

void fibonacci(fibonacci_compute_promise* promise) {
  //std::cout << promise->argument << "\n";
  if (promise->argument == 1 ||  promise->argument == 2) {
    promise->result = 1;
  } else {
    // recursive case
    mutex lock;
    fibonacci_compute_promise left, right;
    left.lock = &lock;
    left.argument = promise->argument - 1;
    left.result_set = false;
    left.parent_tid = fiber_control::get_tid();

    right.lock = &lock;
    right.argument = promise->argument - 2;
    right.result_set = false;
    right.parent_tid = fiber_control::get_tid();

    fiber_control::get_instance().launch(boost::bind(fibonacci, &left));
    fiber_control::get_instance().launch(boost::bind(fibonacci, &right));

    // wait on the left and right promise
    lock.lock();
    while (left.result_set == false || right.result_set == false) {
      fiber_control::deschedule_self(&lock.m_mut);
      lock.lock();
    }
    lock.unlock();

    assert(left.result_set);
    assert(right.result_set);
    promise->result = left.result + right.result;
  }
  promise->lock->lock();
  promise->result_set = true;
  if (promise->parent_tid) fiber_control::schedule_tid(promise->parent_tid);
  promise->lock->unlock();
}


int main(int argc, char** argv) {

  timer ti; ti.start();

  fibonacci_compute_promise promise;
  mutex lock;
  promise.lock = &lock;
  promise.result_set = false;
  promise.argument = 24;
  promise.parent_tid = 0;
  fiber_control::get_instance().launch(boost::bind(fibonacci, &promise));
  fiber_control::get_instance().join();
  assert(promise.result_set);
  std::cout << "Fib(" << promise.argument << ") = " << promise.result << "\n";

  std::cout << "Completion in " << ti.current_time() << "s\n";
  std::cout << fiber_control::get_instance().total_threads_created() << " threads created\n";
}


================================================
FILE: tests/hdfs_test.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <vector>


#include <graphlab/util/hdfs.hpp>

int main(int argc, char **argv) {
  {
    graphlab::hdfs hdfs;
    const bool write = true;
    graphlab::hdfs::fstream file(hdfs, "/tmp/joeytest.txt", write);
    file.good();
    file << "Hello World\n";
    file.close();
    std::vector<std::string> files = hdfs.list_files("/tmp/");
    for(size_t i = 0; i < files.size(); ++i) 
      std::cout << files[i] << std::endl;
  }

  {
    graphlab::hdfs hdfs;
    graphlab::hdfs::fstream file(hdfs, "/tmp/joeytest.txt");
    file.good();
    std::string answer;
    std::getline(file, answer);
    std::cout << "contents: " << std::endl;
    std::cout << answer << std::endl;
    file.close();
  }
    std::cout << "Done!" << std::endl;
}


================================================
FILE: tests/hopscotch_test.cpp
================================================
#include <graphlab/util/hopscotch_table.hpp>
#include <graphlab/util/hopscotch_map.hpp>
#include <graphlab/util/cuckoo_map_pow2.hpp>
#include <boost/unordered_set.hpp>
#include <boost/bind.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/parallel/pthread_tools.hpp>

#include <graphlab/util/memory_info.hpp>
#include <graphlab/macros_def.hpp>


boost::unordered_map<uint32_t, uint32_t> um2;
graphlab::hopscotch_map<uint32_t, uint32_t> cm2;

void hopscotch_map_sanity_checks() {
  const size_t NINS = 1500000;
  ASSERT_TRUE(cm2.begin() == cm2.end());
  for (size_t i = 0;i < NINS; ++i) {
    cm2[17 * i] = i;
    um2[17 * i] = i;
  }

  for (size_t i = 0;i < NINS; ++i) {
    assert(cm2[17 * i] == i);
    assert(um2[17 * i] == i);
  }
  assert(cm2.size() == NINS);
  assert(um2.size() == NINS);

  for (size_t i = 0;i < NINS; i+=2) {
    cm2.erase(17*i);
    um2.erase(17*i);
  }
  for (size_t i = 0;i < NINS; i+=2) {
    assert(cm2.count(17*i) == i % 2);
    assert(um2.count(17*i) == i % 2);
    if (cm2.count(17*i)) {
      assert(cm2.find(17*i)->second == i);
    }
  }

  assert(cm2.size() == NINS / 2);
  assert(um2.size() == NINS / 2);

  typedef graphlab::hopscotch_map<uint32_t, uint32_t>::value_type vpair;
  {
    size_t cnt = 0;
    foreach(vpair &v, cm2) {
      ASSERT_EQ(v.second, um2[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, NINS / 2);
  }
  {
    size_t cnt = 0;
    foreach(const vpair &v, cm2) {
      ASSERT_EQ(v.second, um2[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, NINS / 2);
  }

  std::stringstream strm;
  graphlab::oarchive oarc(strm);
  oarc << cm2;
  strm.flush();

  cm2.clear();
  ASSERT_EQ(cm2.size(), 0);
  graphlab::iarchive iarc(strm);
  iarc >> cm2;
  ASSERT_EQ(cm2.size(), NINS / 2);

}


struct bad_hasher {
  size_t operator()(uint32_t a) const {
    return 1;
  }
};


void hopscotch_high_collision_sanity_checks() {
  const size_t NINS = 15000;
  boost::unordered_map<uint32_t, uint32_t> um2;
  graphlab::hopscotch_map<uint32_t, uint32_t, bad_hasher> cm2;
  ASSERT_TRUE(cm2.begin() == cm2.end());
  for (size_t i = 0;i < NINS; ++i) {
    cm2[17 * i] = i;
    um2[17 * i] = i;
  }

  for (size_t i = 0;i < NINS; ++i) {
    assert(cm2[17 * i] == i);
    assert(um2[17 * i] == i);
  }
  assert(cm2.size() == NINS);
  assert(um2.size() == NINS);

  for (size_t i = 0;i < NINS; i+=2) {
    cm2.erase(17*i);
    um2.erase(17*i);
  }
  for (size_t i = 0;i < NINS; i+=2) {
    assert(cm2.count(17*i) == i % 2);
    assert(um2.count(17*i) == i % 2);
    if (cm2.count(17*i)) {
      assert(cm2.find(17*i)->second == i);
    }
  }

  assert(cm2.size() == NINS / 2);
  assert(um2.size() == NINS / 2);

  typedef graphlab::hopscotch_map<uint32_t, uint32_t>::value_type vpair;
  {
    size_t cnt = 0;
    foreach(vpair &v, cm2) {
      ASSERT_EQ(v.second, um2[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, NINS / 2);
  }
  {
    size_t cnt = 0;
    foreach(const vpair &v, cm2) {
      ASSERT_EQ(v.second, um2[v.first]);
      ++cnt;
    }
    ASSERT_EQ(cnt, NINS / 2);
  }

  std::stringstream strm;
  graphlab::oarchive oarc(strm);
  oarc << cm2;
  strm.flush();

  cm2.clear();
  ASSERT_EQ(cm2.size(), 0);
  graphlab::iarchive iarc(strm);
  iarc >> cm2;
  ASSERT_EQ(cm2.size(), NINS / 2);

}


void benchmark() {
  graphlab::timer ti;

  size_t NUM_ELS = 10000000;

  std::vector<uint32_t> v;
  uint32_t u = 0;
  for (size_t i = 0;i < NUM_ELS; ++i) {
    v.push_back(u);
    u += 1 + rand() % 8;
  }
  std::random_shuffle(v.begin(), v.end());
  graphlab::memory_info::print_usage();

  {
    boost::unordered_map<uint32_t, uint32_t> um;
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      um[v[i]] = i;
    }
    std::cout <<  NUM_ELS / 1000000 << "M unordered map inserts in " << ti.current_time() << " (Load factor = " << um.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = um[v[i]];
      assert(t == i);
    }
    std::cout << "10M unordered map successful probes in " << ti.current_time() << std::endl;
    um.clear();
  }

  {
    graphlab::cuckoo_map_pow2<uint32_t, uint32_t, 3, uint32_t> cm(-1, 128);

    //cm.reserve(102400);
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = i;
  //    if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout << NUM_ELS / 1000000 << "M cuckoo map pow2 inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = cm[v[i]];
      assert(t == i);
    }
    std::cout << "10M cuckoo map pow2 successful probes in " << ti.current_time() << std::endl;
  }

{
    graphlab::hopscotch_map<uint32_t, uint32_t> cm;
    ti.start();
    for (size_t i = 0;i < NUM_ELS; ++i) {
      cm[v[i]] = i;
//      if (i % 1000000 == 0) std::cout << cm.load_factor() << std::endl;

    }
    std::cout << NUM_ELS / 1000000 << "M hopscotch inserts in " << ti.current_time() << " (Load factor = " << cm.load_factor() << ")" << std::endl;

    graphlab::memory_info::print_usage();

    ti.start();
    for (size_t i = 0;i < 10000000; ++i) {
      size_t t = cm[v[i]];
      assert(t == i);
    }
    std::cout << "10M hopscotch successful probes in " << ti.current_time() << std::endl;

  }
}


int main(int argc, char** argv) {
  std::cout << "Hopscotch Map Sanity Checks... \n";
  hopscotch_map_sanity_checks();

  std::cout << "Hopscotch High Collision Sanity Checks... \n";
  hopscotch_high_collision_sanity_checks();

  std::cout << "Map Benchmarks... \n";
  benchmark();
  std::cout << "Done" << std::endl;
}


================================================
FILE: tests/local_graph_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


// standard C++ headers
#include <iostream>
#include <cxxtest/TestSuite.h>

// includes the entire graphlab framework
#include <graphlab/graph/local_graph.hpp>
#include <graphlab/graph/dynamic_local_graph.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/macros_def.hpp>

/**
 * Unit test for graphlab::local_graph.hpp
 */
class local_graph_test : public CxxTest::TestSuite {
public:
  struct vertex_data {
    size_t value;
    vertex_data() : value(0) { }
    vertex_data(size_t n) : value(n) { }
  };

  struct edge_data { 
    int from; 
    int to;
    edge_data (int f = 0, int t = 0) : from(f), to(t) {}
  };

  /**
   * Test add vertex and add edges
   */
  void test_add_vertex() {
    graphlab::local_graph<vertex_data, edge_data> g;
    test_add_vertex_impl(g, 100);
    test_add_vertex_impl(g, 10000);
    test_add_vertex_impl(g, 100000);
    std::cout << "\n+ Pass test: graph add vertex. :) \n";

    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_add_vertex_impl(g2, 100);
    test_add_vertex_impl(g2, 10000);
    test_add_vertex_impl(g2, 100000);
    std::cout << "\n+ Pass test: dynamic graph add vertex. :) \n";
  }

  void test_add_edge() {
    graphlab::local_graph<vertex_data, edge_data> g;
    test_add_edge_impl(g, 100);
    test_add_edge_impl(g, 10000);
    test_add_edge_impl(g, 100000);
    std::cout << "\n+ Pass test: graph add edge. :) \n";
    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_add_edge_impl(g2, 100);
    test_add_edge_impl(g2, 10000);
    test_add_edge_impl(g2, 100000);
      std::cout << "\n+ Pass test: dynamic graph add edge. :) \n";
  }

  void test_dynamic_add_edge() {
    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_add_edge_impl(g2, 100, true); // add edge dynamically
    test_add_edge_impl(g2, 10000, true);
    test_add_edge_impl(g2, 100000, true);
    std::cout << "\n+ Pass test: graph dynamicly add edge. :) \n";
  }

  void test_powerlaw_graph() {
    graphlab::local_graph<vertex_data, edge_data> g;
    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_powerlaw_graph_impl(g, 100); // add edge (powerlaw) 
    test_powerlaw_graph_impl(g, 10000);

    test_powerlaw_graph_impl(g2, 100); // add edge (powerlaw) 
    test_powerlaw_graph_impl(g2, 10000);

    test_powerlaw_graph_impl(g2, 100, true); // add edge (powerlaw) dynamically
    test_powerlaw_graph_impl(g2, 10000, true);
    std::cout << "\n+ Pass test: powerlaw graph add edge. :) \n";
  }

  void test_edge_case() {
    graphlab::local_graph<vertex_data, edge_data> g;
    test_edge_case_impl(g);
    std::cout << "\n+ Pass test: edge case test. :) \n";

    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_edge_case_impl(g2);
    std::cout << "\n+ Pass test: dynamic graph edge case test. :) \n";
  }


  void test_sparse_graph() {
    graphlab::local_graph<vertex_data, edge_data> g;
    test_sparse_graph_impl(g);
    std::cout << "\n+ Pass test: sparse graph test. :) \n";

    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_sparse_graph_impl(g2);
    std::cout << "\n+ Pass test: sparse dyanmic graph test. :) \n";
  }

  void test_grid_graph() {
    graphlab::local_graph<vertex_data, edge_data> g;
    test_grid_graph_impl(g);
    std::cout << "\n+ Pass test: grid graph test. :) \n";

    graphlab::dynamic_local_graph<vertex_data, edge_data> g2;
    test_grid_graph_impl(g2);
    std::cout << "\n+ Pass test: grid dynamic graph test. :) \n";
  }

private: 
  template<typename Graph>
  void test_add_vertex_impl(Graph& g, size_t nverts) {
    g.clear();
    ASSERT_EQ(g.num_vertices(), 0);
    for (size_t i = 0; i < nverts; ++i) {
      g.add_vertex(i, vertex_data(i));
    }
    ASSERT_EQ(g.num_vertices(), nverts); 
    for (size_t i = 0; i < g.num_vertices(); ++i) {
      ASSERT_EQ(g.vertex(i).data().value, i);
    }
    g.finalize();
    ASSERT_EQ(g.num_vertices(), nverts);

    // graph should still support adding vertices after finalization
    // add more vertices and override existing vertex values
    for (size_t i = 0; i < 2*nverts; ++i) {
      g.add_vertex(i, vertex_data(i*2));
    }
    ASSERT_EQ(g.num_vertices(), 2*nverts);
    for (size_t i = 0; i < g.num_vertices(); ++i) {
      ASSERT_EQ(g.vertex(i).data().value, 2*i);
    }
  }

  /**
   * Helper function to check the in/out edges of the graph.
   */
  template<typename Graph>
  void check_adjacency(Graph& g, 
                       boost::unordered_map<typename Graph::vertex_id_type, 
                                            std::vector<typename Graph::vertex_id_type> >& in_edges,
                       boost::unordered_map<typename Graph::vertex_id_type, 
                                            std::vector<typename Graph::vertex_id_type> >& out_edges,
                       size_t nedges) {
    typedef typename Graph::edge_list_type edge_list_type;
    typedef typename Graph::edge_type edge_type;
    typedef typename Graph::vertex_type vertex_type;
    typedef typename Graph::vertex_id_type vertex_id_type;

    // check size 
    ASSERT_EQ(g.num_edges(), nedges);

    size_t nedges_actual = 0;
    // check out edges
    typedef typename boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> >::iterator iter_type;
    for (iter_type it = out_edges.begin(); it != out_edges.end(); ++it) {
      vertex_id_type src = it->first;
      std::set<vertex_id_type> dst_expected = std::set<vertex_id_type>(it->second.begin(), it->second.end());
      const edge_list_type& ls = g.out_edges(src);
      foreach (const edge_type& e, ls) {
        ASSERT_EQ(e.source().id(), src);
        ASSERT_TRUE(dst_expected.count(e.target().id()) == 1);
        dst_expected.erase(e.target().id());
      }
      nedges_actual += ls.size();
    }
    ASSERT_EQ(nedges_actual, g.num_edges());
    ASSERT_EQ(nedges_actual, nedges);

    nedges_actual = 0;
    // check in edges
    for (iter_type it = in_edges.begin(); it != in_edges.end(); ++it) {
      vertex_id_type dst = it->first;
      std::set<vertex_id_type> src_expected = std::set<vertex_id_type>(it->second.begin(), it->second.end());
      const edge_list_type& ls = g.in_edges(dst);
      foreach (const edge_type& e, ls) {
        ASSERT_EQ(e.target().id(), dst);
        ASSERT_TRUE(src_expected.count(e.source().id()) == 1);
        src_expected.erase(e.source().id());
      }
      nedges_actual += ls.size();
    }
    ASSERT_EQ(nedges_actual, g.num_edges());
    ASSERT_EQ(nedges_actual, nedges);
  }

template<typename Graph>
  void check_edge_data(Graph& g) {
      typedef typename Graph::edge_list_type edge_list_type;
      typedef typename Graph::edge_type edge_type;
      typedef typename Graph::vertex_type vertex_type;
      typedef typename Graph::vertex_id_type vertex_id_type;
    for (size_t i = 0; i < g.num_vertices(); ++i) {
      const edge_list_type& in_edges = g.in_edges(i);
      foreach (const edge_type& e, in_edges) {
        ASSERT_EQ(e.data().from, e.source().id());
        ASSERT_EQ(e.data().to, e.target().id());
      }
      const edge_list_type& out_edges = g.out_edges(i);
      foreach (const edge_type& e, out_edges) {
        ASSERT_EQ(e.data().from, e.source().id());
        ASSERT_EQ(e.data().to, e.target().id());
      }
    }
  } 

  template<typename Graph>
  void test_add_edge_impl(Graph& g, size_t nedges, bool use_dynamic=false) {
    typedef typename Graph::vertex_id_type vertex_id_type;
    srand(0);
    g.clear();
    ASSERT_EQ(g.num_edges(), 0);
    boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > out_edges;
    boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > in_edges;
    boost::unordered_set< std::pair<vertex_id_type,vertex_id_type> > all_edges;
    while (all_edges.size() < nedges) {
      vertex_id_type src = rand() % (int)(3*sqrt(nedges));
      vertex_id_type dst = rand() % (int)(3*sqrt(nedges));
      if (src == dst)
        continue;
      std::pair<vertex_id_type,vertex_id_type> pair(src, dst);
      if (!all_edges.count(pair))  {
        all_edges.insert(pair);
        if (!out_edges.count(src)) {
          out_edges[src] = std::vector<vertex_id_type>();
        } 
        if (!in_edges.count(dst)) {
          in_edges[dst] = std::vector<vertex_id_type>();
        }
        in_edges[dst].push_back(src);
        out_edges[src].push_back(dst);
      }
    }
    typedef typename boost::unordered_set< std::pair<vertex_id_type,vertex_id_type> >::value_type pair_type; 
    size_t count = 0;
    foreach (const pair_type& p, all_edges) {
      g.add_edge(p.first, p.second, edge_data(p.first, p.second));
      ++count;
      if (use_dynamic && (all_edges.size()/5) == 0) {
        g.finalize();
      }
    }
    if (!use_dynamic)
      ASSERT_EQ(g.num_edges(), 0); 
    g.finalize();
    check_adjacency(g, in_edges, out_edges, all_edges.size());
    check_edge_data(g);
  }

  
  template<typename Graph>
  void test_edge_case_impl(Graph& g) {
    // TODO: 
    // self edges
    // duplicate edges
    std::cout << "Warning: test not implemented" << std::endl;
  }

  /**
   * Construct a star like sparse graph and test the in/out neighbors.
   */
  template<typename Graph>
  void test_sparse_graph_impl (Graph& g) {
    typedef typename Graph::edge_list_type edge_list_type;
    typedef typename Graph::edge_type edge_type;
    typedef typename Graph::vertex_type vertex_type;
    typedef typename Graph::vertex_id_type vertex_id_type;

    size_t num_v = 10;
    size_t num_e = 6;

    for (size_t i = 0; i < num_v; ++i) {
      vertex_data vdata;
      g.add_vertex(vertex_id_type(i), vdata);
    }

    /**
     * Create a star graph.
     */
    g.add_edge(1,3,edge_data(1,3));
    g.add_edge(2,3,edge_data(2,3));
    g.add_edge(4,3,edge_data(4,3));
    g.add_edge(5,3,edge_data(5,3));
    g.add_edge(3,2, edge_data(3,2));
    g.add_edge(3,5, edge_data(3,5));
    g.finalize();

    ASSERT_EQ(g.num_vertices(), num_v);
    ASSERT_EQ(g.num_edges(), num_e);

    /**
     * Test number of in/out edges.
     */
    for (vertex_id_type i = 0; i < 6; ++i) {
      edge_list_type inedges = g.in_edges(i);
      edge_list_type outedges = g.out_edges(i);
      size_t arr_insize[] = {0,0,1,4,0,1};
      size_t arr_outsize[] = {0,1,1,2,1,1};
      if (i != 3) {
        ASSERT_EQ(inedges.size(), arr_insize[i]);
        ASSERT_EQ(outedges.size(), arr_outsize[i]);
        if (outedges.size() > 0)
          {
            ASSERT_EQ(outedges[0].source().id(), i);
            ASSERT_EQ(outedges[0].target().id(), 3);
            edge_data data = (outedges[0]).data();
            ASSERT_EQ(data.from, i);
            ASSERT_EQ(data.to, 3);
          }
      } else {
        std::set<vertex_id_type> out_neighbors;
        out_neighbors.insert(5);
        out_neighbors.insert(2);
        ASSERT_EQ(outedges.size(), out_neighbors.size());
        for (size_t j = 0; j < 2; ++j) {
          edge_data data = (outedges[j]).data();
          ASSERT_EQ(data.from, 3);
          ASSERT_TRUE(out_neighbors.count(data.to) == 1);
          out_neighbors.erase(data.to);
        }

        std::set<vertex_id_type> in_neighbors;
        in_neighbors.insert(5);
        in_neighbors.insert(4);
        in_neighbors.insert(2);
        in_neighbors.insert(1);
        ASSERT_EQ(inedges.size(), in_neighbors.size());
        for (size_t j = 0; j < 4; ++j) {
          edge_data data = (inedges[j]).data();
          ASSERT_EQ(data.to, 3);
          ASSERT_TRUE(in_neighbors.count(data.from) == 1);
          in_neighbors.erase(data.from);
        }
      }
    }

    for (vertex_id_type i = 6; i < num_v; ++i) {
      edge_list_type inedges = g.in_edges(i);
      edge_list_type outedges = g.out_edges(i);
      ASSERT_EQ(0, inedges.size());
      ASSERT_EQ(0, outedges.size());
    }
  }

  /**
     In this function, we construct the 3 by 3 grid graph.
  */
  template<typename Graph>
  void test_grid_graph_impl(Graph& g, bool verbose = false) {
    typedef typename Graph::edge_list_type edge_list_type;
    typedef typename Graph::edge_type edge_type;
    typedef typename Graph::vertex_type vertex_type;
    typedef typename Graph::vertex_id_type vertex_id_type;

    g.clear();
    if (verbose) 
      std::cout << "-----------Begin Grid Test: ID Accessors--------------------" << std::endl;
    size_t dim = 3;
    size_t num_vertices = 0;
    size_t num_edge = 0;


    // here we create dim * dim vertices.
    for (size_t i = 0; i < dim * dim; ++i) {
      // create the vertex data, randomizing the color
      vertex_data vdata;
      vdata.value = 0;
      // create the vertex
      g.add_vertex(vertex_id_type(i), vdata);
      ++num_vertices;
    }

    // create the edges. The add_edge(i,j,edgedata) function creates
    // an edge from i->j. with the edgedata attached.   edge_data edata;

    for (size_t i = 0;i < dim; ++i) {
      for (size_t j = 0;j < dim - 1; ++j) {
        // add the horizontal edges in both directions
        //
        g.add_edge(dim * i + j, dim * i + j + 1, edge_data(dim*i+j, dim*i+j+1));
        g.add_edge(dim * i + j + 1, dim * i + j, edge_data(dim*i+j+1, dim*i+j));

        // add the vertical edges in both directions
        g.add_edge(dim * j + i, dim * (j + 1) + i, edge_data(dim*j+i, dim*(j+1)+i));
        g.add_edge(dim * (j + 1) + i, dim * j + i, edge_data(dim*(j+1)+i, dim*j+i));
        num_edge += 4;
      }
    }

    // the graph is now constructed
    // we need to call finalize. 
    g.finalize();

    if (verbose) printf("Test num_vertices()...\n");
    ASSERT_EQ(g.num_vertices(), num_vertices);
    if (verbose) printf("+ Pass test: num_vertices :)\n\n");

    if (verbose) printf("Test num_edges()...\n");
    ASSERT_EQ(g.num_edges(), num_edge);
    if (verbose) printf("+ Pass test: num_edges :)\n\n");

    // Symmetric graph: #inneighbor == outneighbor
    if (verbose) printf("Test num_in_neighbors() == num_out_neighbors() ...\n");
    for (size_t i = 0; i < num_vertices; ++i) {
      ASSERT_EQ(g.in_edges(i).size(), g.vertex(i).num_in_edges());
      ASSERT_EQ(g.out_edges(i).size(), g.vertex(i).num_out_edges());
      ASSERT_EQ(g.in_edges(i).size(), g.out_edges(i).size());
    }
    ASSERT_EQ(g.in_edges(4).size(), 4);
    ASSERT_EQ(g.in_edges(0).size(), 2);
    if (verbose) printf("+ Pass test: #in = #out...\n\n");


    if (verbose) 
      printf("Test iterate over in/out_edges and get edge data: \n");
    for (vertex_id_type i = 0; i < num_vertices; ++i) {
      const edge_list_type& out_edges = g.out_edges(i);
      const edge_list_type& in_edges = g.in_edges(i);

      if (verbose) {
        std::cout << "Test v: " << i << "\n"
                  << "In edge ids: ";
        foreach(edge_type edge, in_edges) 
            std::cout << "(" << edge.data().from << ","
                      << edge.data().to << ") ";
        std::cout <<std::endl;

        std::cout << "Out edge ids: ";
        foreach(edge_type edge, out_edges) 
            std::cout << "(" << edge.data().from << "," 
                      << edge.data().to << ") ";
        std::cout <<std::endl;
      }

      foreach(edge_type edge, out_edges) {
        edge_data edata = edge.data();
        ASSERT_EQ(edge.source().id(), i);
        ASSERT_EQ(edata.from, edge.source().id());
        ASSERT_EQ(edata.to, edge.target().id());
      }

      foreach(edge_type edge, in_edges) {
        edge_data edata = edge.data();
        ASSERT_EQ(edge.target().id(), i);
        ASSERT_EQ(edata.from, edge.source().id());
        ASSERT_EQ(edata.to, edge.target().id());
      }
    }
    if (verbose)
      printf("+ Pass test: iterate edgelist and get data. :) \n");

    for (vertex_id_type i = 0; i < num_vertices; ++i) {
      vertex_type v = g.vertex(i);
      const edge_list_type& out_edges = v.out_edges();
      const edge_list_type& in_edges = v.in_edges();

      if (verbose) {
        std::cout << "Test v: " << i << std::endl;
        printf("In edge ids: ");
        foreach(edge_type edge, in_edges) 
            std::cout << "(" << edge.data().from << ","
                      << edge.data().to << ") ";
        std::cout <<std::endl;

        printf("Out edge ids: ");
        foreach(edge_type edge, out_edges) 
            std::cout << "(" << edge.data().from << "," 
                      << edge.data().to << ") ";
        std::cout <<std::endl;
      }

      foreach(edge_type edge, out_edges) {
        edge_data edata = edge.data();
        ASSERT_EQ(edge.source().id(), i);
        ASSERT_EQ(edata.from, edge.source().id());
        ASSERT_EQ(edata.to, edge.target().id());
      }

      foreach(edge_type edge, in_edges) {
        edge_data edata = edge.data();
        ASSERT_EQ(edge.target().id(), i);
        ASSERT_EQ(edata.from, edge.source().id());
        ASSERT_EQ(edata.to, edge.target().id());
      }
    }
    if (verbose) {
      printf("+ Pass test: iterate edgelist and get data. :) \n");
      std::cout << "-----------End Grid Test--------------------" << std::endl;
    }
  }

  /**
   * Test powerlaw graph.
   */
  template<typename Graph>
  void test_powerlaw_graph_impl(Graph& g, size_t nverts, bool use_dynamic = false, double alpha = 2.1) {
    graphlab::random::seed(0);
    g.clear();
    typedef typename Graph::edge_list_type edge_list_type;
    typedef typename Graph::edge_type edge_type;
    typedef typename Graph::vertex_type vertex_type;
    typedef typename Graph::vertex_id_type vertex_id_type;

    boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > out_edges;
    boost::unordered_map<vertex_id_type, std::vector<vertex_id_type> > in_edges;
    boost::unordered_set< std::pair<vertex_id_type,vertex_id_type> > all_edges;

      // construct powerlaw out degree distribution 
      std::vector<double> prob(nverts, 0);
      for(size_t i = 0; i < prob.size(); ++i)
        prob[i] = std::pow(double(i+1), -alpha);
      graphlab::random::pdf2cdf(prob);

      vertex_id_type dst = 0;

      // A large prime number
      const size_t HASH_OFFSET = 2654435761;
      // construct powerlaw graph with no dup edges
      for(vertex_id_type src  = 0; src < nverts; ++src) {
        const size_t out_degree = graphlab::random::multinomial_cdf(prob) + 1;
        for(size_t i = 0; i < out_degree; ++i) {
          dst = (dst + HASH_OFFSET)  % nverts;
          while (src == dst) {
            dst = (dst + HASH_OFFSET)  % nverts;
          }
          std::pair<vertex_id_type, vertex_id_type> pair(src, dst);
          if (!all_edges.count(pair))  {
            all_edges.insert(pair);
            if (!out_edges.count(src)) {
              out_edges[src] = std::vector<vertex_id_type>();
            } 
            if (!in_edges.count(dst)) {
              in_edges[dst] = std::vector<vertex_id_type>();
            }
            in_edges[dst].push_back(src);
            out_edges[src].push_back(dst);
          }
        }
      }

    typedef typename boost::unordered_set< std::pair<vertex_id_type, vertex_id_type> >::value_type pair_type; 
    size_t count = 0;
    foreach (const pair_type& p, all_edges) {
      g.add_edge(p.first, p.second, edge_data(p.first, p.second));
      ++count;
      if (use_dynamic && count % (all_edges.size()/5) == 0) {
        g.finalize();
      }
    }
    if (!use_dynamic)
      ASSERT_EQ(g.num_edges(), 0); 
    g.finalize();
    check_adjacency(g, in_edges, out_edges, all_edges.size());
    check_edge_data(g);
  }
};

#include <graphlab/macros_undef.hpp>


================================================
FILE: tests/lock_free_pushback.cxx
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/bind.hpp>
#include <cxxtest/TestSuite.h>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/lockfree_push_back.hpp>
using namespace graphlab;

std::vector<size_t> vec;
lockfree_push_back<std::vector<size_t> > pusher(vec, 0);

void testthread(size_t range0, size_t range1) {
  for (size_t i = range0;i < range1; ++i) {
    pusher.push_back(i);
  }
}

class LockFreePushBack : public CxxTest::TestSuite {
public:

  void test_lockfree_push_back(void) {
    thread_group thr;
    for (size_t i = 0;i < 16; ++i) {
      thr.launch(boost::bind(testthread, i * 100000, (i+1) * 100000));
    }

    thr.join();

    TS_ASSERT_EQUALS(pusher.size(), (size_t)16 * 100000);
    vec.resize(pusher.size());
    std::sort(vec.begin(), vec.end());
    for (size_t i = 0;i < vec.size(); ++i) {
      TS_ASSERT_EQUALS(vec[i], i);
    }
  }
};


================================================
FILE: tests/mini_web_server.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <stdio.h>
#include <string.h>
#include <cassert>
#include <map>
#include <graphlab/ui/mongoose/mongoose.h>
#include <sstream>

#include <graphlab.hpp>

#include <graphlab/macros_def.hpp>

static void *callback(enum mg_event event,
                      struct mg_connection *conn,
                      const struct mg_request_info *request_info) {
  if (event == MG_NEW_REQUEST) {
    assert(request_info != NULL);
    
    const std::string url = (request_info->uri == NULL)?
      std::string("/") : std::string(request_info->uri) ;
    const std::string query = (request_info->query_string == NULL)?
      std::string("") : std::string(request_info->query_string) ;

    std::string response = "<p>URL: (" + url + ")</p> <ul>";

 
    std::map<std::string, std::string> map = graphlab::web_util::parse_query(query);
    typedef std::map<std::string, std::string>::value_type pair_type;
    foreach(pair_type pair, map) 
      response += "<li> " + pair.first + " -- " + pair.second + "</li>";
    response += "</ul>";
    

    mg_printf(conn,
              "HTTP/1.1 200 OK\r\n"
              "Content-Type: text/html\r\n"
              "Content-Length: %d\r\n"        // Always set Content-Length
              "\r\n"
              "%s",
              int(response.size()), response.c_str());
 
    // Mark as processed
    return (void*)(1);
  } else {
    return NULL;
  }
}

int main(void) {
  struct mg_context *ctx;
  const char *options[] = {"listening_ports", "8080", NULL};

  ctx = mg_start(&callback, NULL, options);
  getchar();  // Wait until user hits "enter"
  mg_stop(ctx);

  return 0;
}


//// Using lib event

// #include <sys/types.h>
// #include <sys/time.h>
// #include <sys/queue.h>
// #include <stdlib.h>

// #include <err.h>
// #include <event.h>
// #include <evhttp.h>

// void generic_handler(struct evhttp_request *req, void *arg)
// {
//   struct evbuffer *buf;
//   buf = evbuffer_new();
  
//   if (buf == NULL)
//     err(1, "failed to create response buffer");
//   for(size_t i = 0; i < 1000; ++i) {
//     evbuffer_add_printf(buf, "Requested: %s\n", evhttp_request_uri(req));
//   }
//   evhttp_send_reply(req, HTTP_OK, "OK", buf);
// }

// int main(int argc, char **argv)
// {
//     struct evhttp *httpd;

//     event_init();
//     httpd = evhttp_start("0.0.0.0", 8080);

//     /* Set a callback for requests to "/specific". */
//     /* evhttp_set_cb(httpd, "/specific", another_handler, NULL); */

//     /* Set a callback for all other requests. */
//     evhttp_set_gencb(httpd, generic_handler, NULL);

//     event_dispatch();

//     /* Not reached in this code as it is now. */
//     evhttp_free(httpd);

//     return 0;
// }


================================================
FILE: tests/random_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <cxxtest/TestSuite.h>

#include <cmath>
#include <iostream>
#include <vector>

#include <graphlab.hpp>


typedef double vertex_data_type;
typedef double edge_data_type;


template<typename NumType>
void uniform_speed(const size_t max_iter) {
  NumType sum(0);
  graphlab::timer ti;
  ti.start();
  for(size_t i = 0; i < max_iter; ++i) {
    sum += (NumType)(graphlab::random::uniform<NumType>(0, 10));
  }
  double slow_time = ti.current_time();
  ti.start();
  for(size_t i = 0; i < max_iter; ++i) {
    sum += (NumType)(graphlab::random::fast_uniform<NumType>(0, 10));
  }
  double fast_time = ti.current_time();
  std::cout << slow_time << ", " << fast_time << std::endl; 
}


class thread_worker {
public:
  std::vector<int> values;
  void run() {
    namespace random = graphlab::random;
    for(size_t i = 0; i < values.size(); ++i) {
      values[i] = random::uniform<int>(0,3);
    }
  }
};

template<typename T>
std::ostream& operator<<(std::ostream& out, const std::vector<T>& values) {
  out << "{";
  for(size_t i = 0; i < values.size(); ++i) {
    out << values[i];
    if(i + 1 < values.size()) out << ", ";
  }
  return out << "}";
}


std::vector<int> operator+(const std::vector<int>& v1, 
                           const std::vector<int>& v2) {
  assert(v1.size() == v2.size());
  std::vector<int> result(v1.size());
  for(size_t i = 0; i < result.size(); ++i) {
    result[i] = v1[i] + v2[i];
  }
  return result;
}


class RandomTestSuite: public CxxTest::TestSuite {
  size_t iterations;
  
  public:

  RandomTestSuite() : iterations(1E8) { }
  
 
  void test_nondet_generator() {
    graphlab::random::nondet_seed();
    graphlab::random::nondet_seed();
    graphlab::random::nondet_seed();
  }


  void test_random_number_generators() {
    std::cout << std::endl;
    std::cout << "beginning seed" << std::endl;
    namespace random = graphlab::random;
    graphlab::random::seed();
    graphlab::random::time_seed();
    graphlab::random::nondet_seed();
    graphlab::random::seed(12345);
    std::cout << "finished" << std::endl;

    const size_t num_iterations(20);
    std::vector<thread_worker> workers(10);
    for(size_t i = 0; i < workers.size(); ++i) 
      workers[i].values.resize(num_iterations);
    graphlab::thread_group threads;
    for(size_t i = 0; i < workers.size(); ++i) {
      threads.launch(boost::bind(&thread_worker::run, &(workers[i])));
    }
    threads.join();
    for(size_t i = 0; i < workers.size(); ++i) {
      std::cout << workers[i].values << std::endl;
    }
    std::vector<int> sum(workers[0].values.size());
    for(size_t i = 0; i < workers.size(); ++i) {
      sum = sum + workers[i].values;
    }
    std::cout << "Result: " << sum << std::endl;
  }


  void test_shuffle() {
    namespace random = graphlab::random;
    random::nondet_seed();
    std::vector<int> numbers(100);
    for(size_t i = 0; i < numbers.size(); ++i) numbers[i] = (int)i + 1;
    for(size_t j = 0; j < 10; ++j) {
      // shuffle the numbers
      random::shuffle(numbers);
      std::cout << numbers << std::endl;
    }
  }


  // void test_speed() {
  //   namespace random = graphlab::random;
  //   std::cout << "speed test run: " << std::endl;
  //   const size_t MAX_ITER(10000);
  //   std::cout << "size_t:   "; 
  //   uniform_speed<size_t>(MAX_ITER);
  //   std::cout << "int:      "; 
  //   uniform_speed<int>(MAX_ITER);
  //   std::cout << "uint32_t: "; 
  //   uniform_speed<uint32_t>(MAX_ITER);
  //   std::cout << "uint16_t: "; 
  //   uniform_speed<uint16_t>(MAX_ITER);
  //   std::cout << "char:     "; 
  //   uniform_speed<char>(MAX_ITER);
  //   std::cout << "float:    "; 
  //   uniform_speed<float>(MAX_ITER);
  //   std::cout << "double:   "; 
  //   uniform_speed<double>(MAX_ITER);
    
  //   std::cout << "gaussian: ";
  //   double sum = 0;
  //   graphlab::timer time;
  //   time.start();
  //   for(size_t i = 0; i < MAX_ITER; ++i) 
  //     sum += random::gaussian();
  //   std::cout << time.current_time() << std::endl;
    
  //   std::cout << "shuffle:  "; 
  //   std::vector<int> numbers(6);
  //   for(size_t i = 0; i < numbers.size(); ++i) numbers[i] = (int)i + 1;
  //   time.start();
  //   for(size_t j = 0; j < MAX_ITER/numbers.size(); ++j) {
  //     // shuffle the numbers
  //     random::shuffle(numbers);
  //   }
  //   std::cout << time.current_time() << ", ";
  //   time.start();
  //   for(size_t j = 0; j < MAX_ITER/numbers.size(); ++j) {
  //     // shuffle the numbers
  //     std::random_shuffle(numbers.begin(), numbers.end());
  //   }
  //   std::cout << time.current_time() << std::endl;    
  // }


};


================================================
FILE: tests/runtests.sh
================================================
#!/bin/bash

function quit_if_bad_retvalue {
  if [ $? -eq 0 ]; then
    echo "PASS"
  else
    echo "FAIL. Program returned with failure"
    exit 1
  fi
}

function test_rpc_prog {
  echo "Testing $1 ..."
  echo "---------$1-------------" >> $stdoutfname
  echo "---------$1-------------" >> $stderrfname 
  mpiexec -n 2 -host $localhostname ./$1  >> $stdoutfname 2>> $stderrfname
  if [ $? -ne 0 ]; then
    echo "FAIL. Program returned with failure"
    exit 1
  fi
  str="mpiexec -n 2 -host $localhostname ./$1 2> /dev/null | grep \"$2\""
  #echo $str
  e=`eval $str`
  if [ -z "$e" ] ; then
    echo "Expected program output not obtained"
    exit 1
  fi
}

stdoutfname=$PWD/stdout.log
stderrfname=$PWD/stderr.log
echo $PWD | grep debug > /dev/null
dbgpath=$?
echo $PWD | grep release > /dev/null
relpath=$?
echo $PWD | grep profile > /dev/null
propath=$?

if [ $dbgpath -eq 1 ]; then
  if [ $relpath -eq 1 ]; then
    if [ $propath -eq 1 ]; then
	echo "This test must be run from either ./release/tests/, ./debug/tests/, or ./profile/tests/ in Graphlab root folder"
        echo "Please compile GraphLab first, using the instructions on http://graphlab.org/download.html and try again from the approprite folder"
        exit 1
    fi 
  fi
fi


rm -f $stdoutfname $stderrfname

if [ $# -eq 0 ]; then

echo "Running Standard unit tests"
echo "==========================="
ctest -O testlog.txt
./anytests
./anytests_loader
# delete extra generated files
rm -f dg*

fi

echo | tee -a $stdoutfname
echo "Running application tests"| tee -a $stdoutfname
echo "========================="| tee -a $stdoutfname
echo "GraphLab collaborative filtering library"| tee -a $stdoutfname
somefailed=0
if [ -f ../demoapps/pmf/pmf ]; then
  pushd . > /dev/null
  cd ../demoapps/pmf
  echo "---------PMF-------------" >> $stdoutfname
  OUTFILE=smalltest.out
  ./pmf --show_version=true
  if [ $? -eq 2 ]; then
    echo "detected Eigen based pmf"| tee -a $stdoutfname
    OUTFILE=smalltest_eigen.out
  else
    echo "detected it++ based pmf"| tee -a $stdoutfname
  fi
  echo "********************TEST1************************" >> $stdoutfname
  ./pmf --unittest 1 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 1 (Alternating least squares)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=1"| tee -a $stdoutfname
  fi
  echo "********************TEST2************************" >> $stdoutfname
  ./pmf --unittest 71 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 2 (Lanczos)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=71 (Lanczos)"| tee -a $stdoutfname
  fi
  echo "********************TEST4************************" >> $stdoutfname
  ./pmf --unittest 91 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 3 (Weighted ALS)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=91 (weighted alternating least squares)"| tee -a $stdoutfname
  fi
  echo "********************TEST5************************" >> $stdoutfname
 ./pmf --unittest 101 --ncpus=1 >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 4 (CoSaMP)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=101 (CoSaMP)"| tee -a $stdoutfname
     somefailed=1
  fi
  echo "********************TEST6************************" >> $stdoutfname
 ./pmf --unittest 131  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 5 (SVD)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=131 (SVD)"| tee -a $stdoutfname
     somefailed=1
  fi
  popd > /dev/null

else
  echo "PMF not found. "| tee -a $stdoutfname
fi
echo


echo "GraphLab clustring library"| tee -a $stdoutfname
if [ -f ../demoapps/clustering/glcluster ]; then
  pushd . > /dev/null
  cd ../demoapps/clustering
  echo "---------CLUSTERING-------------" >> $stdoutfname
  echo "---------CLUSTERING-------------" >> $stderrfname
  echo "********************TEST1************************" >> $stdoutfname
  ./glcluster --unittest 1  $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 1 (Math functions)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=1 (Math functions)"| tee -a $stdoutfname
  fi
  echo "********************TEST2************************" >> $stdoutfname
  ./glcluster --unittest 2 >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 2 (Distance functions)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=2 (Distance functions)"| tee -a $stdoutfname
  fi
  echo "********************TEST3************************" >> $stdoutfname
  ./glcluster --unittest 4 >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 3 (Floating point math functions)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=3 (Floating point math functions)"| tee -a $stdoutfname
  fi
  popd  > /dev/null
else
  echo "Clustering library not found. "| tee -a $stdoutfname
fi
 
echo | tee -a $stdoutfname
echo "GraphLab Linear Solvers Library"| tee -a $stdoutfname
if [ -f ../demoapps/gabp/gabp ]; then
  pushd . > /dev/null
  cd ../demoapps/gabp
  echo "---------GABP-------------" >> $stdoutfname
  echo "********************TEST1************************" >> $stdoutfname
  ./gabp --unittest=1 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 1 (GaBP non-square)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=1 (GabP non-square)"| tee -a $stdoutfname
  fi
  echo "********************TEST2************************" >> $stdoutfname
  ./gabp --unittest=2 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 2 (GaBP square)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest 2 (GaBP square)" | tee -a $stdoutfname
  fi
  echo "********************TEST3************************" >> $stdoutfname
  ./gabp --unittest=3 --ncpus=1 --debug=true >> $stdoutfname 2>& 1
  if [ $? -eq 0 ]; then
     echo "PASS TEST 3 (Jacobi)"| tee -a $stdoutfname
  else
     somefailed=1
     echo "FAIL --unittest=3 (Jacobi)"| tee -a $stdoutfname
  fi
  echo "********************TEST4************************" >> $stdoutfname
 ./gabp --unittest=4 --ncpus=1 >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 4 (Conjugate Gradient - square)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=4 (Conjugate Gradient - square)"| tee -a $stdoutfname
     somefailed=1
  fi
  echo "********************TEST5************************" >> $stdoutfname
 ./gabp --unittest=5  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 5 (Conjugate Gradient - non square)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=5 (Conjugate Gradient- non square)"| tee -a $stdoutfname
     somefailed=1
  fi
  echo "********************TEST6************************" >> $stdoutfname
 ./gabp --unittest=51  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 6 (Conjugate Gradient, matrix market format)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=6 (Conjugate Gradient- matrix market)"| tee -a $stdoutfname
     somefailed=1
  fi
  echo "********************TEST7************************" >> $stdoutfname
 ./gabp --unittest=21  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 7 (GaBP, matrix market format, square, regularization)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=7 (gaBP, matrix market format, square, regularization)"| tee -a $stdoutfname
     somefailed=1
  fi
   echo "********************TEST8************************" >> $stdoutfname
 ./gabp --unittest=22  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 8 (Jacobi, matrix market format, symmetric)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=8 (Jacobi, matrix market format, symmetric)"| tee -a $stdoutfname
     somefailed=1
  fi
   echo "********************TEST9************************" >> $stdoutfname
 ./gabp --unittest=23  >> $stdoutfname 2>& 1 
  if [ $? -eq 0 ]; then
     echo "PASS TEST 9 (Conjugate Gradient, matrix market format, symmetric)"| tee -a $stdoutfname
  else
     echo "FAIL --unittest=9 (Conjugate Gradient, matrix market, symmetric)"| tee -a $stdoutfname
     somefailed=1
  fi
    popd > /dev/null
else
  echo "Linear solver library not found. "| tee -a $stdoutfname
fi
echo


  if [ $somefailed == 1 ]; then
     echo "**** FAILURE LOG **************" >> $stdoutfname
     cat $stderrfname >> $stdoutfname
     echo "**** CONFIGURE.DEPS **************" >> $stdoutfname
     cat ../../configure.deps >> $stdoutfname
     echo "**** CONFIG.LOG **************" >> $stdoutfname
     cat ../../config.log >> $stdoutfname
     echo "**** SYSTEM STATS **************" >> $stdoutfname
     echo `date` >> $stdoutfname
     echo `uname -a` >> $stdoutfname
     echo `echo $USER` >> $stdoutfname
     echo "Some of the tests failed".
     echo "Please email stdout.log to danny.bickson@gmail.com"
     echo "Thanks for helping improve GraphLab!"
  fi


if [ -f ../demoapps/demo/demo ]; then
  pushd . > /dev/null
  cd ../demoapps/demo
  echo "Demo..."
  echo "---------demo-------------" >> $stdoutfname
  echo "---------demo-------------" >> $stderrfname
  
  ./demo  >> $stdoutfname 2>> $stderrfname
  quit_if_bad_retvalue
  popd > /dev/null
else
  echo "demo not found. "
fi

echo
echo "RPC Tests"
echo "========="
echo "Testing for availability of an MPI daemon"
localhostname=`hostname`
mpdtrace
if [ $? -eq 0 ]; then
  echo "MPI available"
else
  echo "MPI not available. Distributed/RPC tests not running."
  exit 1
fi


test_rpc_prog rpc_example1 "5 plus 1 is : 6\\|11 plus 1 is : 12"
test_rpc_prog rpc_example2 "hello world!\\|1, 2, 1,"
test_rpc_prog rpc_example3 "1.a = 10\\|10.b = 0\\|string = hello world!"
test_rpc_prog rpc_example4 "1.a = 10\\|10.b = 0\\|string = hello world!"
test_rpc_prog rpc_example5 "1 + 2.000000 = three"
test_rpc_prog rpc_example6 "10\\|15\\|hello world\\|10.5\\|10"
test_rpc_prog rpc_example7 "set from 1\\|set from 1\\|set from 0\\|set from 0\\|set from 1\\|set from 1\\|set from 0\\|set from 0"

echo
echo "Distributed GraphLab Tests"
echo "=========================="

echo "Testing Distributed disk graph construction..."
echo "---------distributed_dg_construction_test-------------" >> $stdoutfname
echo "---------distributed_dg_construction_test-------------" >> $stderrfname 
mpiexec -n 2 -host $localhostname ./distributed_dg_construction_test >> $stdoutfname 2>> $stderrfname
quit_if_bad_retvalue
rm -f dg*

echo "Testing Distributed Graph ..."
echo "---------distributed_graph_test-------------" >> $stdoutfname
echo "---------distributed_graph_test-------------" >> $stderrfname 
./distributed_graph_test -g
mpiexec -n 2 -host $localhostname ./distributed_graph_test -b >> $stdoutfname 2>> $stderrfname
quit_if_bad_retvalue
rm -f dg*


================================================
FILE: tests/scheduler_test.cxx
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <fstream>
#include <vector>
#include <graphlab/scheduler/scheduler_includes.hpp>
#include <graphlab/rpc/async_consensus.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <cxxtest/TestSuite.h>


using namespace graphlab;

distributed_control dc;

struct message_type {
  int value;
  double pr;

  explicit message_type(int value = 0, double pr = 0):value(value),pr(pr) { }
  
  double priority() const {
    return pr;
  }
  message_type& operator+=(const message_type& other) {
    value += other.value;
    pr = other.value;
    return *this;
  }
};


const size_t NCPUS = 4;
const size_t NUM_VERTICES = 101;
std::vector<atomic<int> > correctness_counter;


template <typename SchedulerType>
void test_scheduler_basic_functionality_single_threaded() {
  graphlab_options opts;
  opts.set_ncpus(NCPUS);
  SchedulerType sched(NUM_VERTICES, opts);
  const size_t target_value = 100;
  
  // inject a sequence of messages which will sum to 100 per vertex
  for (size_t c = 0;c < target_value; ++c) {
    for (size_t i = 0; i < NUM_VERTICES; ++i) {
      sched.schedule(i, message_type(1, 1.0));
    }
  }
  correctness_counter.clear();
  correctness_counter.resize(NUM_VERTICES, atomic<int>(0));
  sched.start();
  
  // pull stuff out
  bool allcpus_done = false; 
  while(!allcpus_done) {
    allcpus_done = true;
    for (size_t i = 0; i < NCPUS; ++i) {
      vertex_id_type v; message_type m;
      sched_status::status_enum ret = sched.get_next(i, v, m);
      if (ret == sched_status::NEW_TASK) {
        allcpus_done = false;
        correctness_counter[v].inc(m.value);
      }
    }
  }

  // check the counters
  for(size_t i = 0; i < NUM_VERTICES; ++i) {
    TS_ASSERT_EQUALS(correctness_counter[i].value, (int)target_value);
  }
}


template <typename SchedulerType>
void test_basic_functionality_thread(SchedulerType& sched, 
                                     async_consensus& consensus, 
                                     size_t schedule_count,
                                     size_t threadid) {
  size_t c = 0;
  vertex_id_type v; message_type m;
  while(1) {
    // process as many tasks as I can
    while(1) {
      sched_status::status_enum ret = sched.get_next(threadid, v, m);
      if (ret == sched_status::NEW_TASK) {
        correctness_counter[v].inc(m.value);
      }
      else {
        break;
      }
    }
    
    // schedule 1 cycle. If I schedule stuff I go back to processing tasks
    if (c < schedule_count) {
      for (size_t i = 0; i < NUM_VERTICES; ++i) {
        sched.schedule(i, message_type(1, 1.0));
        consensus.cancel();
      }
      ++c;
      continue;
    }
    
    // nothing to schedule, nothing to run. try to quit
    consensus.begin_done_critical_section(threadid);
    sched_status::status_enum ret = sched.get_next(threadid, v, m);
    if (ret == sched_status::NEW_TASK) {
      // there is task. cancel, process it, and look back
      consensus.cancel_critical_section(threadid);
      correctness_counter[v].inc(m.value);
    }
    else {
      // no more tasks try to finish up
      bool ret = consensus.end_done_critical_section(threadid);
      if (ret) break;
    }
  }
}


template <typename SchedulerType>
void test_scheduler_basic_functionality_parallel() {
  graphlab_options opts;
  opts.set_ncpus(NCPUS);
  SchedulerType sched(NUM_VERTICES, opts);
  async_consensus consensus(dc, NCPUS);
  
  const size_t schedule_count = 10000;
  const size_t target_value = schedule_count * NCPUS + 1;

  correctness_counter.clear();
  correctness_counter.resize(NUM_VERTICES, atomic<int>(0));

  // inject a sequence of messages which will sum to 100 per vertex
  for (size_t i = 0; i < NUM_VERTICES; ++i) {
    sched.schedule(i, message_type(1, 1.0));
  }
  sched.start();
  
  thread_group group;
  for (size_t i = 0;i < NCPUS;++i) {
    group.launch(boost::bind(test_basic_functionality_thread<SchedulerType>,
                             boost::ref(sched), boost::ref(consensus), schedule_count, i));
  }

  group.join();
  // check the counters
  for(size_t i = 0; i < NUM_VERTICES; ++i) {
    TS_ASSERT_EQUALS(correctness_counter[i].value, (int)target_value);
  }
}


/*
 * Like test_basic_functionality_thread, but only increments the 
 * correctness_counter by 1, and checks to make sure the priority is at least
 * 100.0
 */
template <typename SchedulerType>
void test_scheduler_min_priority_thread(SchedulerType& sched, 
                                        async_consensus& consensus, 
                                        size_t schedule_count,
                                        size_t threadid) {
  size_t c = 0;
  vertex_id_type v; message_type m;
  while(1) {
    // process as many tasks as I can
    while(1) {
      sched_status::status_enum ret = sched.get_next(threadid, v, m);
      if (ret == sched_status::NEW_TASK) {
        TS_ASSERT_LESS_THAN_EQUALS(100.0, m.priority());
        correctness_counter[v].inc(1);
      }
      else {
        break;
      }
    }
    
    // schedule 1 cycle. If I schedule stuff I go back to processing tasks
    if (c < schedule_count) {
      for (size_t i = 0; i < NUM_VERTICES; ++i) {
        sched.schedule(i, message_type(1, 1.0));
        consensus.cancel();
      }
      ++c;
      continue;
    }
    
    // nothing to schedule, nothing to run. try to quit
    consensus.begin_done_critical_section(threadid);
    sched_status::status_enum ret = sched.get_next(threadid, v, m);
    if (ret == sched_status::NEW_TASK) {
      TS_ASSERT_LESS_THAN_EQUALS(100.0, m.priority());
      // there is task. cancel, process it, and look back
      consensus.cancel_critical_section(threadid);
      correctness_counter[v].inc(1);
    }
    else {
      // no more tasks try to finish up
      bool ret = consensus.end_done_critical_section(threadid);
      if (ret) break;
    }
  }
}


template <typename SchedulerType>
void test_scheduler_min_priority_parallel() {
  graphlab_options opts;
  opts.set_ncpus(NCPUS);
  opts.get_scheduler_args().set_option("min_priority", 100.0);
  
  SchedulerType sched(NUM_VERTICES, opts);
  async_consensus consensus(dc, NCPUS);
  
  const size_t schedule_count = 10000;
  const size_t maximum_value = (size_t)((schedule_count * NCPUS + 101.0) / 100.0);

  correctness_counter.clear();
  correctness_counter.resize(NUM_VERTICES, atomic<int>(0));

  // inject a sequence of messages which will sum to 100 per vertex
  for (size_t i = 0; i < NUM_VERTICES; ++i) {
    sched.schedule(i, message_type(1, 101.0));
  }
  sched.start();
  
  thread_group group;
  for (size_t i = 0;i < NCPUS;++i) {
    group.launch(boost::bind(test_scheduler_min_priority_thread<SchedulerType>,
                             boost::ref(sched), boost::ref(consensus), schedule_count, i));
  }

  group.join();
  // check the counters
  for(size_t i = 0; i < NUM_VERTICES; ++i) {
    TS_ASSERT_LESS_THAN_EQUALS(correctness_counter[i].value, (int)maximum_value);
  }
}

class SerializeTestSuite : public CxxTest::TestSuite {
public:
  void test_scheduler_basic_single_threaded() {
    test_scheduler_basic_functionality_single_threaded<sweep_scheduler<message_type> >();
    test_scheduler_basic_functionality_single_threaded<fifo_scheduler<message_type> >();
    test_scheduler_basic_functionality_single_threaded<priority_scheduler<message_type> >();
    test_scheduler_basic_functionality_single_threaded<queued_fifo_scheduler<message_type> >();
  }
  
  void test_scheduler_basic_parallel() {
    test_scheduler_basic_functionality_parallel<sweep_scheduler<message_type> >();
    test_scheduler_basic_functionality_parallel<fifo_scheduler<message_type> >();
    test_scheduler_basic_functionality_parallel<priority_scheduler<message_type> >();
    test_scheduler_basic_functionality_parallel<queued_fifo_scheduler<message_type> >();
  }
  
    
  void test_scheduler_min_priority() {
    test_scheduler_min_priority_parallel<sweep_scheduler<message_type> >();
    test_scheduler_min_priority_parallel<fifo_scheduler<message_type> >();
    test_scheduler_min_priority_parallel<priority_scheduler<message_type> >();
    test_scheduler_min_priority_parallel<queued_fifo_scheduler<message_type> >();
  }

};


================================================
FILE: tests/serializetests.cxx
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <fstream>
#include <vector>
#include <map>
#include <string>
#include <cstring>

#include <cxxtest/TestSuite.h>

#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/stream.hpp>

#include <graphlab/util/generics/any.hpp>
#include <graphlab/serialization/serialization_includes.hpp>


using namespace graphlab;


struct A{
  int z;
  void save(oarchive &a) const {
    a << z;
  }
  void load(iarchive &a) {
    a >> z;
  }
};

class TestClass{
public:
  int i;
  int j;
  std::vector<int> k;
  A l;
  void save(oarchive &a) const {
    a << i << j << k << l;
  }
  void load(iarchive &a) {
    a >> i >> j >> k >> l;
  }
};


struct pod_class_1: public graphlab::IS_POD_TYPE {
  size_t x;
};

struct pod_class_2 {
  size_t x;
}; 
SERIALIZABLE_POD(pod_class_2);


class SerializeTestSuite : public CxxTest::TestSuite {
public:

  // Look for the class TestClass() to see the most interesting tutorial on how to
  // use the serializer
  void test_basic_datatype(void) {
    char t1 = 'z';
    bool t2 = true;
    int t3 = 10;
    int t4 = 18345;
    long t5 = 30921233;
    long long t6 = (long long)(t5)*100;
    float t7 = 10.35;
    double t8 = 3.14156;
    const char *t9 = "hello world";
    const char * t10 = "blue";
    graphlab::any t11;
    t11 = size_t(10);
    char r1;
    bool r2;
    int r3;
    int r4;
    long r5;
    long long r6;
    float r7;
    double r8;
    char r9[100];
    char r10[10];
    graphlab::any r11;

    // serialize t1-10
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << t1 << t2 << t3 << t4 << t5 << t6 << t7 << t8;
    serialize(a, t9, strlen(t9) + 1);
    serialize(a, t10, strlen(t10) + 1);
    a << t11;
    f.close();

    // deserialize into r1-10
    std::ifstream g;
    g.open("test.bin",std::fstream::binary);
    iarchive b(g);
    b >> r1 >> r2 >> r3 >> r4 >> r5 >> r6 >> r7 >> r8;
    deserialize(b, &r9, strlen(t9) + 1);
    deserialize(b, r10, strlen(t10) + 1);
    b >> r11;
    g.close();

    TS_ASSERT_EQUALS(t1, r1);
    TS_ASSERT_EQUALS(t2, r2);
    TS_ASSERT_EQUALS(t3, r3);
    TS_ASSERT_EQUALS(t4, r4);
    TS_ASSERT_EQUALS(t5, r5);
    TS_ASSERT_EQUALS(t6, r6);
    TS_ASSERT_EQUALS(t7, r7);
    TS_ASSERT_EQUALS(t8, r8);
    TS_ASSERT_SAME_DATA(t9, r9, strlen(t9) + 1);
    TS_ASSERT_SAME_DATA(t10, r10, strlen(t10) + 1);
    TS_ASSERT_EQUALS(r11.as<size_t>(), t11.as<size_t>());
  }

  void test_vector_serialization(void) {
    std::vector<int> v;
    for (int i = 0;i< 10; ++i) {
      v.push_back(i);
    }
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << v;
    f.close();

    std::vector<int> w;
    std::ifstream g;
    iarchive b(g);
    g.open("test.bin",std::fstream::binary);
    b >> w;
    g.close();

    for (int i = 0;i< 10; ++i) {
      TS_ASSERT_EQUALS(v[i], w[i]);
    }
  }


  void test_class_serialization(void) {
    // create a test class
    TestClass t;
    t.i=10;
    t.j=20;
    t.k.push_back(30);

    //serialize
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << t;
    f.close();
    //deserialize into t2
    TestClass t2;
    std::ifstream g;
    g.open("test.bin",std::fstream::binary);
    iarchive b(g);
    b >> t2;
    g.close();
    // check
    TS_ASSERT_EQUALS(t.i, t2.i);
    TS_ASSERT_EQUALS(t.j, t2.j);
    TS_ASSERT_EQUALS(t.k.size(), t2.k.size());
    TS_ASSERT_EQUALS(t.k[0], t2.k[0]);
  }

  void test_vector_of_classes(void) {
    // create a vector of test classes
    std::vector<TestClass> vt;
    vt.resize(10);
    for (int i=0;i<10;i++) {
      vt[i].i=i;
      vt[i].j=i*21;
      vt[i].k.resize(10);
      vt[i].k[i]=i*51;
    }

    //serialize
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << vt;
    f.close();

    //deserialize into vt2
    std::vector<TestClass> vt2;
    std::ifstream g;
    g.open("test.bin",std::fstream::binary);
    iarchive b(g);
    b >> vt2;
    g.close();
    // check
    TS_ASSERT_EQUALS(vt.size(), vt2.size());
    for (size_t i=0;i<10;i++) {
      TS_ASSERT_EQUALS(vt[i].i, vt2[i].i);
      TS_ASSERT_EQUALS(vt[i].j, vt2[i].j);
      TS_ASSERT_EQUALS(vt[i].k.size(), vt2[i].k.size());
      for (size_t j = 0; j < vt[i].k.size(); ++j) {
        TS_ASSERT_EQUALS(vt[i].k[j], vt2[i].k[j]);
      }
    }
  }

  void test_vector_of_strings(void) {
    std::string x = "Hello world";
    std::string y = "This is a test";
    std::vector<std::string> v;
    v.push_back(x); v.push_back(y);

    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << v;
    f.close();

    //deserialize into vt2
    std::vector<std::string> v2;
    std::ifstream g;
    g.open("test.bin",std::fstream::binary);
    iarchive b(g);
    b >> v2;
    g.close();
    TS_ASSERT_EQUALS(v[0], v2[0]);
    TS_ASSERT_EQUALS(v[1], v2[1]);
  }

  void test_map_serialization(void) {
    std::map<std::string,int> v;
    v["one"] = 1;
    v["two"] = 2;
    v["three"] = 3;

    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << v;
    f.close();

    //deserialize into vt2
    std::map<std::string,int> v2;
    std::ifstream g;
    g.open("test.bin",std::fstream::binary);
    iarchive b(g);
    b >> v2;
    g.close();
    TS_ASSERT_EQUALS(v["one"], v2["one"]);
    TS_ASSERT_EQUALS(v["two"], v2["two"]);
    TS_ASSERT_EQUALS(v["three"], v2["three"]);
  }

  void test_repeated_array_serialization(void) {
    typedef std::map<int, int> intmap;
    std::vector<char> buffer;
    std::vector<size_t> sizes(5);
    std::cout << "Making maps =====================================" << std::endl;
    for(size_t i = 0; i < sizes.size(); ++i) {
      std::stringstream strm;
      oarchive arc(strm);
      intmap im;
      im[i] = i;
      im[10*i] = 10*i;
      if(i % 2 == 0) im[i+sizes.size()] = 3;
      for(intmap::const_iterator iter = im.begin(); iter != im.end(); ++iter) {
        std::cout << "[" << iter->first << ", " << iter->second << "]\t";
      }
      std::cout << std::endl;
      arc << im;
      std::string str(strm.str());
      sizes[i] = str.size();
      int index = buffer.size();
      buffer.resize(index + str.size());
      memcpy(&buffer[index], str.c_str(), str.size());
    }
    std::cout << "reading maps =====================================" << std::endl;
    namespace bio = boost::iostreams;
    typedef bio::stream<bio::array_source> icharstream;
  

    for(size_t i = 0, offset=0; i < sizes.size(); ++i) {
      icharstream strm(&buffer[offset], sizes[i]);
      offset += sizes[i];
      intmap im;
      iarchive arc(strm);
      arc >> im;
      for(intmap::const_iterator iter = im.begin(); iter != im.end(); ++iter) {
        std::cout << "[" << iter->first << ", " << iter->second << "]\t";
      }
      std::cout << std::endl;
    }

  }
  
  void test_boost_unordered_map(void) {
    boost::unordered_map<std::string, size_t> m;
    m["hello"] = 1;
    m["world"] = 2;
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << m;
    f.close();

    boost::unordered_map<std::string, size_t> m2;
    std::ifstream g;
    iarchive b(g);
    g.open("test.bin",std::fstream::binary);
    b >> m2;
    g.close();

    TS_ASSERT_EQUALS(m["hello"], m2["hello"]);
    TS_ASSERT_EQUALS(m["world"], m2["world"]);
  }


  void test_boost_unordered_set(void) {
    boost::unordered_set<std::string> m;
    m.insert("hello");
    m.insert("world");
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << m;
    f.close();

    boost::unordered_set<std::string> m2;
    std::ifstream g;
    iarchive b(g);
    g.open("test.bin",std::fstream::binary);
    b >> m2;
    g.close();

    TS_ASSERT(m2.find("hello") != m2.end());
    TS_ASSERT(m2.find("world") != m2.end());
  }
  
  void test_pod_method_1() {
    std::vector<pod_class_1> p1;
    for (size_t i = 0;i < 1000; ++i) {
        pod_class_1 p;
        p.x = i;
        p1.push_back(p);
    }
    
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << p1;
    f.close();

    std::vector<pod_class_1> p2;
    
    std::ifstream g;
    iarchive b(g);
    g.open("test.bin",std::fstream::binary);
    b >> p2;
    g.close();

    for (size_t i = 0;i < 1000; ++i) {
        TS_ASSERT_EQUALS(p1[i].x, p2[i].x);
    }
  }
  
    void test_pod_method_2() {
    std::vector<pod_class_2> p1;
    for (size_t i = 0;i < 1000; ++i) {
        pod_class_2 p;
        p.x = i;
        p1.push_back(p);
    }
    
    std::ofstream f;
    f.open("test.bin",std::fstream::binary);
    oarchive a(f);
    a << p1;
    f.close();

    std::vector<pod_class_2> p2;
    
    std::ifstream g;
    iarchive b(g);
    g.open("test.bin",std::fstream::binary);
    b >> p2;
    g.close();

    for (size_t i = 0;i < 1000; ++i) {
        TS_ASSERT_EQUALS(p1[i].x, p2[i].x);
    }
  }
};


================================================
FILE: tests/sfinae_function_test.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>
#include <graphlab/util/generics/test_function_or_functor_type.hpp>

struct ts{
  int i;
};


void by_value(ts) {
}

void by_const_value(const ts) {
}

void by_reference(ts&) {
}

void by_const_reference(const ts&) {
}


struct functor_by_value{
  void operator()(ts) { }
};

struct functor_by_const_value{
  void operator()(const ts) { }
};

struct functor_by_reference{
  void operator()(ts&) { }
};

struct functor_by_const_reference{
  void operator()(const ts&) { }
};


struct const_functor_by_value{
  void operator()(ts) const { }
};

struct const_functor_by_const_value{
  void operator()(const ts) const { }
};

struct const_functor_by_reference{
  void operator()(ts&) const { }
};

struct const_functor_by_const_reference{
  void operator()(const ts&) const { }
};


struct overload_functor_by_value{
  void operator()(ts) { }
  void operator()(ts) const { }
};

struct overload_functor_by_const_value{
  void operator()(const ts) { }
  void operator()(const ts) const { }
};

struct overload_functor_by_reference{
  void operator()(ts&) { }
  void operator()(ts&) const { }
};

struct overload_functor_by_const_reference{
  void operator()(const ts&) { }
  void operator()(const ts&) const { }
};


/*
 * Returns true if T is a function which matches void(const ts&)
 * or if T is a functor with a void operator()(const ts&) const
 */
template <typename T>
int test_function_is_const_ref(T t) {
  return graphlab::test_function_or_const_functor_1<T,
                                                    void(const ts&), /* function form*/
                                                    void,            /* return type */
                                                    const ts&        /* argument 1 */
                                                    >::value;
}

int main(int argc, char** argv) {
  std::cout << test_function_is_const_ref(by_value) << std::endl;
  std::cout << test_function_is_const_ref(by_const_value) << std::endl;
  std::cout << test_function_is_const_ref(by_reference) << std::endl;
  std::cout << test_function_is_const_ref(by_const_reference) << std::endl;

  std::cout << test_function_is_const_ref(functor_by_value()) << std::endl;
  std::cout << test_function_is_const_ref(functor_by_const_value()) << std::endl;
  std::cout << test_function_is_const_ref(functor_by_reference()) << std::endl;
  std::cout << test_function_is_const_ref(functor_by_const_reference()) << std::endl;

  std::cout << test_function_is_const_ref(const_functor_by_value()) << std::endl;
  std::cout << test_function_is_const_ref(const_functor_by_const_value()) << std::endl;
  std::cout << test_function_is_const_ref(const_functor_by_reference()) << std::endl;
  std::cout << test_function_is_const_ref(const_functor_by_const_reference()) << std::endl;

  std::cout << test_function_is_const_ref(overload_functor_by_value()) << std::endl;
  std::cout << test_function_is_const_ref(overload_functor_by_const_value()) << std::endl;
  std::cout << test_function_is_const_ref(overload_functor_by_reference()) << std::endl;
  std::cout << test_function_is_const_ref(overload_functor_by_const_reference()) << std::endl;
}

================================================
FILE: tests/small_map_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <algorithm>
#include <iostream>
#include <map>

#include <boost/unordered_set.hpp>

#include <cxxtest/TestSuite.h>

#include <graphlab.hpp>
#include <graphlab/util/small_map.hpp>

using namespace graphlab;

#include <graphlab/macros_def.hpp>
class test_small_map : public CxxTest::TestSuite {
public:

  void test_lookup() {
    typedef small_map<32, size_t, double> map_type;
    map_type map;
    std::map<size_t, double> stdmap;
    map[5] = stdmap[5] = 5.1;
    map[1] = stdmap[1] = 1.1;
    map[2] = stdmap[2] = 2.1;    
    ASSERT_EQ(map.size(), stdmap.size());
    std::cout << std::endl;
    std::cout << map << std::endl;
    typedef std::pair<size_t, double> pair_type;
    foreach(pair_type pair, stdmap) {
      ASSERT_EQ(map[pair.first], pair.second);
      ASSERT_EQ(map.safe_find(pair.first), pair.second);
      ASSERT_TRUE(map.has_key(pair.first));
    }
    foreach(pair_type pair, map) {
      ASSERT_EQ(stdmap[pair.first], pair.second);
    }

    map_type map2;
    std::map<size_t, double> stdmap2;
    map2[0] = stdmap2[0] = 0.2;
    map2[5] = stdmap2[5] = 5.2;
    map2[2] = stdmap2[2] = 2.2;    
    map2[1] = stdmap2[1] = 1.2;
    map2[8] = stdmap2[8] = 8.2;
    ASSERT_EQ(map2.size(), stdmap2.size());
    map_type map3 = map + map2;
    std::map<size_t, double> stdmap3 = 
      graphlab::map_union(stdmap, stdmap2);

    std::cout << map3 << std::endl;
    
    foreach(pair_type pair, stdmap3) {
      ASSERT_EQ(map3[pair.first], pair.second);
      ASSERT_EQ(map3.safe_find(pair.first), pair.second);
      ASSERT_TRUE(map3.has_key(pair.first));
    }
    foreach(pair_type pair, map3) {
      ASSERT_EQ(stdmap3[pair.first], pair.second);
    }

    
  }


  // void test_lookup() {
  //   typedef small_map<32, size_t, std::string> map_type;
  //   map_type map;
  //   std::map<size_t, std::string> stdmap;
  //   map[5] = stdmap[5] = "five";
  //   map[1] = stdmap[1] = "one";
  //   map[2] = stdmap[2] = "two";    
  //   ASSERT_EQ(map.size(), stdmap.size());
  //   std::cout << std::endl;
  //   std::cout << map << std::endl;
  //   typedef std::pair<size_t, std::string> pair_type;
  //   foreach(pair_type pair, stdmap) {
  //     ASSERT_EQ(map[pair.first], pair.second);
  //     ASSERT_EQ(map.safe_find(pair.first), pair.second);
  //     ASSERT_TRUE(map.has_key(pair.first));
  //   }
  //   foreach(pair_type pair, map) {
  //     ASSERT_EQ(stdmap[pair.first], pair.second);
  //   }

  //   map_type map2;
  //   std::map<size_t, std::string> stdmap2;
  //   map2[0] = stdmap2[0] = "ZERO";
  //   map2[5] = stdmap2[5] = "FIVE";
  //   map2[2] = stdmap2[2] = "TWO";    
  //   map2[1] = stdmap2[1] = "ONE";
  //   map2[8] = stdmap2[8] = "EIGHT";
  //   ASSERT_EQ(map2.size(), stdmap2.size());
  //   map_type map3 = map + map2;
  //   std::map<size_t, std::string> stdmap3 = 
  //     graphlab::map_union(stdmap, stdmap2);

  //   std::cout << map3 << std::endl;
    
  //   foreach(pair_type pair, stdmap3) {
  //     ASSERT_EQ(map3[pair.first], pair.second);
  //     ASSERT_EQ(map3.safe_find(pair.first), pair.second);
  //     ASSERT_TRUE(map3.has_key(pair.first));
  //   }
  //   foreach(pair_type pair, map3) {
  //     ASSERT_EQ(stdmap3[pair.first], pair.second);
  //   }

    
  // }


};
#include <graphlab/macros_undef.hpp>


================================================
FILE: tests/small_set_test.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <algorithm>
#include <iostream>

#include <boost/unordered_set.hpp>

#include <cxxtest/TestSuite.h>

#include <graphlab.hpp>
#include <graphlab/util/small_set.hpp>

using namespace graphlab;

#include <graphlab/macros_def.hpp>
class test_small_set : public CxxTest::TestSuite {
public:

  
  void test_union() {
    std::cout << std::endl;
    std::cout << "Testing set union" << std::endl;

    typedef small_set<10, int> set_type;
    typedef small_set<5, int> small_set_type;
    small_set<0, int> empty_set;
    small_set<10, int> set1;
    small_set<10, int> set2;
    set1 += set_type(1) + small_set_type(3) + set_type(2) + empty_set;
    set1 += 1;
    set1 += 3;
    set1 += 2;
    set1 += empty_set;
    set1 += 1;
    std::set<int> true_set1;
    true_set1.insert(1);
    true_set1.insert(2);
    true_set1.insert(3);
    ASSERT_EQ(set_type(true_set1), set1);
    std::cout << "set1: " << set1 << std::endl;
    set2 += set_type(2) + small_set_type(5) + small_set_type(3) + set_type(7);
    set2.insert(0);
    set2 += 7;
    set2 += 0;

    std::set<int> true_set2;
    true_set2.insert(0);
    true_set2.insert(2);
    true_set2.insert(5);
    true_set2.insert(3);
    true_set2.insert(7);
    ASSERT_EQ(set_type(true_set2), set2);    
    std::cout << "set2: " << set2 << std::endl;

    small_set<7, int> set3 = set1 + set2;
    std::set<int> true_set3 = set_union(true_set1, true_set2);
    ASSERT_EQ(set_type(true_set3), set3);
    std::cout << "set3 = set1 + set2: " << set3 << std::endl;
    std::cout << "set3 + set3: " << (set3  + set3) << std::endl;
    ASSERT_EQ(set_type(true_set3), (set3 + set3));    
  }


  void test_intersection() {
    std::cout << std::endl;
    std::cout << "Testing set union" << std::endl;

    typedef small_set<10, int> set_type;
    typedef small_set<5, int> small_set_type;
    small_set<0, int> empty_set;
    small_set<10, int> set1;
    small_set<10, int> set2;
    set1 += set_type(1) + small_set_type(3) + set_type(2) + empty_set;
    set1.insert(8);
    // do some intersections
    set1 *= set1;
    set1 = set1 * set1;
    std::set<int> true_set1;
    true_set1.insert(1);
    true_set1.insert(2);
    true_set1.insert(3);
    true_set1.insert(8);
    ASSERT_EQ(set_type(true_set1), set1);
    std::cout << "set1: " << set1 << std::endl;
    
    set2 += set_type(2) + small_set_type(5) + small_set_type(3) + set_type(7);
    set2.insert(0);
    set2 += 4;

    std::set<int> true_set2;
    true_set2.insert(0);
    true_set2.insert(2);
    true_set2.insert(5);
    true_set2.insert(3);
    true_set2.insert(7);
    true_set2.insert(4);
    ASSERT_EQ(set_type(true_set2), set2);    
    std::cout << "set2: " << set2 << std::endl;

    small_set<7, int> set3 = set1 * set2;
    std::set<int> true_set3 = set_intersect(true_set1, true_set2);
    ASSERT_EQ(set_type(true_set3), set3);
    std::cout << "set3 = set1 * set2: " << set3 << std::endl;
    std::cout << "set3 * set3: " << (set3  + set3) << std::endl;
    ASSERT_EQ(set_type(true_set3), (set3 + set3));    
  }


  void test_difference() {
    std::cout << std::endl;
    std::cout << "Testing set diff" << std::endl;

    typedef small_set<10, int> set_type;
    typedef small_set<5, int> small_set_type;
    small_set<0, int> empty_set;
    small_set<10, int> set1;
    small_set<10, int> set2;
    set1 += set_type(1) + small_set_type(3) + set_type(2) + empty_set;
    set1.insert(8);
    // do some intersections
    ASSERT_EQ(empty_set, set1 - set1);
    ASSERT_EQ(empty_set, empty_set - empty_set);
    empty_set = (empty_set - set1);
    ASSERT_EQ(empty_set, empty_set - set1);
    ASSERT_EQ(set1, set1 - empty_set);
    std::set<int> true_set1;
    true_set1.insert(1);
    true_set1.insert(2);
    true_set1.insert(3);
    true_set1.insert(8);
    ASSERT_EQ(set_type(true_set1), set1);
    std::cout << "set1: " << set1 << std::endl;
    set2 += set_type(2) + small_set_type(5) + small_set_type(3) + set_type(7);
    set2.insert(0);
    set2 += 4;
    std::set<int> true_set2;
    true_set2.insert(0);
    true_set2.insert(2);
    true_set2.insert(5);
    true_set2.insert(3);
    true_set2.insert(7);
    true_set2.insert(4);
    ASSERT_EQ(set_type(set_difference(true_set1, true_set2)),
              set1 - set2);
    ASSERT_EQ(set_type(set_difference(true_set2, true_set1)),
              set2 - set1);
  }

  void test_range_iteration() {
    typedef std::pair<int, std::string> pair_type;
    typedef small_set<20, pair_type > set_type;
    set_type set = 
      set_type(std::make_pair(1, "hello")) + 
      set_type(std::make_pair(2, "world"));
    foreach(const pair_type& value, set) {
      std::cout << value.first << value.second << ", ";
    }
    std::cout << std::endl;
  }

  
};

#include <graphlab/macros_undef.hpp>


================================================
FILE: tests/sort_test.cpp
================================================
#include <graphlab/rpc/dc.hpp>
#include <graphlab/rpc/sample_sort.hpp>
#include <algorithm>

using namespace graphlab;

int main(int argc, char** argv) {
  mpi_tools::init(argc, argv);
  distributed_control dc;
  std::vector<size_t> keys;
  std::vector<size_t> values;
  for (size_t i = 0;i < 1000000; ++i) {
    size_t s = rand();
    keys.push_back(s); values.push_back(s);
  }

  sample_sort<size_t, size_t> sorter(dc);
  sorter.sort(keys.begin(), keys.end(),
              values.begin(), values.end());

  std::vector<std::vector<std::pair<size_t, size_t> > > result(dc.numprocs());
  
  std::swap(result[dc.procid()], sorter.result());
  dc.gather(result, 0);
  if (dc.procid() == 0) {
    // test that it is sorted and the values are correct
    size_t last = 0;
    for (size_t i = 0;i < result.size(); ++i) {
      dc.cout() << result[i].size() << ",";
      for (size_t j = 0; j < result[i].size(); ++j) {
        ASSERT_EQ(result[i][j].first, result[i][j].second);
        ASSERT_GE(result[i][j].first, last);
        last = result[i][j].first;
      }
    }
    dc.cout() << std::endl;
  }
  mpi_tools::finalize();
}


================================================
FILE: tests/synchronous_engine_test.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <algorithm>
#include <iostream>


// #include <cxxtest/TestSuite.h>

#include <graphlab.hpp>

typedef graphlab::distributed_graph<int,int> graph_type;


class count_in_neighbors : 
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type 
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }
  gather_type 
  gather(icontext_type& context, const vertex_type& vertex, 
         edge_type& edge) const {
    // if (edge.target().id() == 7)
    //   std::cout << edge.source().id() << "\t" << edge.target().id() << std::endl;
    return 1;    
  }
  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    // if (total != int(vertex.num_in_edges())) {
    //   std::cout << "test fail vid : " << vertex.id() << std::endl;
    // }
    ASSERT_EQ( total, int(vertex.num_in_edges()) );
    context.signal(vertex);
  }
  edge_dir_type
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors


void test_in_neighbors(graphlab::distributed_control& dc,
                       graphlab::command_line_options& clopts,
                       graph_type& graph) {
  std::cout << "Constructing a syncrhonous engine for in neighbors" << std::endl;
  typedef graphlab::synchronous_engine<count_in_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();

  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;

  if (graph.is_dynamic()) {
    std::cout << "Test engine on dynamic graph !" << std::endl;
    graph.load_synthetic_powerlaw(10000);
    graph.finalize();
    engine.signal_all();
    std::cout << "Running!" << std::endl;
    engine.start();
    std::cout << "Finished" << std::endl;
  }
}


class count_out_neighbors : 
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type 
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }
  gather_type 
  gather(icontext_type& context, const vertex_type& vertex, 
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    ASSERT_EQ( total, int(vertex.num_out_edges()) );
    context.signal(vertex);
  }
  edge_dir_type 
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors

void test_out_neighbors(graphlab::distributed_control& dc,
                        graphlab::command_line_options& clopts,
                        graph_type& graph) {
  std::cout << "Constructing a syncrhonous engine for out neighbors" << std::endl;
  typedef graphlab::synchronous_engine<count_out_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;

  if (graph.is_dynamic()) {
    std::cout << "Test engine on dynamic graph !" << std::endl;
    graph.load_synthetic_powerlaw(10000);
    graph.finalize();
    engine.signal_all();
    std::cout << "Running!" << std::endl;
    engine.start();
    std::cout << "Finished" << std::endl;
  }
}


class count_all_neighbors : 
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type 
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  gather_type 
  gather(icontext_type& context, const vertex_type& vertex, 
         edge_type& edge) const {
    return 1;
  }
  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    ASSERT_EQ( total, int(vertex.num_in_edges() + vertex.num_out_edges() ) );
    context.signal(vertex);
  }
  edge_dir_type 
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count neighbors

void test_all_neighbors(graphlab::distributed_control& dc,
                        graphlab::command_line_options& clopts,
                        graph_type& graph) {
  std::cout << "Constructing a syncrhonous engine for all neighbors" << std::endl;
  typedef graphlab::synchronous_engine<count_all_neighbors> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
  if (graph.is_dynamic()) {
    std::cout << "Test engine on dynamic graph !" << std::endl;
    graph.load_synthetic_powerlaw(10000);
    graph.finalize();
    engine.signal_all();
    std::cout << "Running!" << std::endl;
    engine.start();
    std::cout << "Finished" << std::endl;
  }
}


class basic_messages : 
  public graphlab::ivertex_program<graph_type, int, int>,
  public graphlab::IS_POD_TYPE {
  int message_value;
public:

  void init(icontext_type& context, const vertex_type& vertex,
                    const message_type& msg) {
    message_value = msg;
  } 

  edge_dir_type 
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }
 
  gather_type gather(icontext_type& context, const vertex_type& vertex, 
         edge_type& edge) const {
    return 1;
  }

  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    context.signal(vertex, 0);
    if(message_value < 0) {
      // first iteration has wrong messages
      return;
    }
    ASSERT_EQ(total, message_value);

  }

  edge_dir_type 
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    context.signal(edge.target(), 1);
  }

}; // end of test_messages

void test_messages(graphlab::distributed_control& dc,
                   graphlab::command_line_options& clopts,
                   graph_type& graph) {
  std::cout << "Testing messages" << std::endl;
  typedef graphlab::synchronous_engine<basic_messages> engine_type;
  engine_type engine(dc, graph, clopts);
  std::cout << "Scheduling all vertices to test messages" << std::endl;
  engine.signal_all(-1);
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
  if (graph.is_dynamic()) {
    engine.init();
    std::cout << "Test engine on dynamic graph !" << std::endl;
    graph.load_synthetic_powerlaw(10000);
    graph.finalize();
    engine.signal_all(-1);
    std::cout << "Running!" << std::endl;
    engine.start();
    std::cout << "Finished" << std::endl;
  }
}


class count_aggregators : 
  public graphlab::ivertex_program<graph_type, int>,
  public graphlab::IS_POD_TYPE {
public:
  edge_dir_type 
  gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  }
  gather_type 
  gather(icontext_type& context, const vertex_type& vertex, 
         edge_type& edge) const {
    ASSERT_LT(vertex.data(), 100);
    ASSERT_GE(vertex.data(), 0);
    return vertex.data();
  }
  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    ASSERT_EQ( total, context.iteration() * vertex.num_in_edges() );
    vertex.data() = context.iteration() + 1; 
    if(context.iteration() < 10) context.signal(vertex);
  }
  edge_dir_type 
  scatter_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
}; // end of count aggregators

int iteration_counter(count_aggregators::icontext_type& context,
                      const graph_type::vertex_type& vertex) {
  ASSERT_LT(vertex.data(), 100);
  return vertex.data();
}
int finalize_iter = 0;
void iteration_finalize(count_aggregators::icontext_type& context,
                        const int& total) {
  std::cout << "Finalized" << std::endl;
  ASSERT_EQ(total, context.num_vertices() * (context.iteration()+1));
  ASSERT_EQ(finalize_iter++, context.iteration());
}


void test_count_aggregators(graphlab::distributed_control& dc,
                            graphlab::command_line_options& clopts,
                            graph_type& graph) {
  std::cout << "Constructing a syncrhonous engine for aggregators" << std::endl;
  typedef graphlab::synchronous_engine<count_aggregators> engine_type;
  engine_type engine(dc, graph, clopts);
  engine.add_vertex_aggregator<int>("iteration_counter", 
                                    iteration_counter, iteration_finalize);
  engine.aggregate_periodic("iteration_counter", 0);
  std::cout << "Scheduling all vertices to count their neighbors" << std::endl;
  engine.signal_all();
  std::cout << "Running!" << std::endl;
  engine.start();
  std::cout << "Finished" << std::endl;
  ASSERT_EQ(finalize_iter, engine.iteration());
}


int main(int argc, char** argv) {
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::dc_init_param rpc_parameters;
  graphlab::init_param_from_mpi(rpc_parameters);
  graphlab::distributed_control dc(rpc_parameters);

  graphlab::command_line_options clopts("Test code.");
  clopts.engine_args.set_option("max_iterations", 10);
  std::cout << "Creating a powerlaw graph" << std::endl;
  graph_type graph(dc, clopts);
  graph.load_synthetic_powerlaw(10000);
  graph.finalize();
  test_in_neighbors(dc, clopts, graph);
  test_out_neighbors(dc, clopts, graph);
  test_all_neighbors(dc, clopts, graph);
  test_messages(dc, clopts, graph);
  test_count_aggregators(dc, clopts, graph);

  graphlab::mpi_tools::finalize();
} // end of main


================================================
FILE: tests/test_lock_free_pool.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/lock_free_pool.hpp>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/logger/assertions.hpp>

using namespace graphlab;
lock_free_pool<size_t> pool;

void exec() {
  size_t *s = NULL;
  for (size_t i = 0;i < 1000000; ++i) {
    while(1) {
      s = pool.alloc();
      if (s == NULL) continue;
      else {
       for (size_t j = 0;j < 10; ++j) (*s)++;
       pool.free(s);
       break;
      }
    } 
  }
}


class LockFreePoolTestSuite: public CxxTest::TestSuite {
 public:  
  void test_lock_free_pool() {
    size_t nthreads = 8;
    
    pool.reset_pool(32);
    thread_group g;
    for (size_t i = 0; i < nthreads; ++i) {
      g.launch(exec);
    }
    while(1) {
      try {
        g.join();
        break;
      }
      catch(const char* c ) {
        std::cout << c << "\n";
      }
    }
    
    std::vector<size_t> alldata = pool.unsafe_get_pool_ref();
    size_t total = 0;
    for (size_t i = 0;i < alldata.size(); ++i) {
      total += alldata[i];
    }
    TS_ASSERT_EQUALS(total, 10000000 * nthreads);
  }
};


================================================
FILE: tests/test_parsers.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/graph/distributed_graph.hpp>
#include <graphlab/macros_def.hpp>

typedef graphlab::distributed_graph<size_t, size_t> graph_type;

void check_structure(graph_type &graph) {
  ASSERT_EQ(graph.num_vertices(), 5);
  ASSERT_EQ(graph.num_edges(), 7);
  // check vertex 0 
  {
    graph_type::vertex_type vtype = graph.vertex(0);
    graph_type::local_edge_list_type v0_out = graph_type::local_vertex_type(vtype).out_edges();
    ASSERT_EQ(v0_out.size(), 1);
    ASSERT_EQ(v0_out[0].target().global_id(), 5);
  }
  // vertex 1
  {
    graph_type::vertex_type vtype = graph.vertex(1);
    graph_type::local_edge_list_type v0_out = graph_type::local_vertex_type(vtype).out_edges();
    ASSERT_EQ(v0_out.size(), 2);
    ASSERT_EQ(v0_out[0].target().global_id(), 0);
    ASSERT_EQ(v0_out[1].target().global_id(), 5);
  }
  
  // vertex 2
  {
    graph_type::vertex_type vtype = graph.vertex(2);
    graph_type::local_edge_list_type v0_out = graph_type::local_vertex_type(vtype).out_edges();
    ASSERT_EQ(v0_out.size(), 2);
    ASSERT_EQ(v0_out[0].target().global_id(), 0);
    ASSERT_EQ(v0_out[1].target().global_id(), 5);
  }
  // vertex 3
  {
    graph_type::vertex_type vtype = graph.vertex(3);
    graph_type::local_edge_list_type v0_out = graph_type::local_vertex_type(vtype).out_edges();
    ASSERT_EQ(v0_out.size(), 2);
    ASSERT_EQ(v0_out[0].target().global_id(), 0);
    ASSERT_EQ(v0_out[1].target().global_id(), 5);
  }
}


void test_adj(graphlab::distributed_control& dc) {
  graphlab::distributed_graph<size_t, size_t> graph(dc);
  graph.load_format("data/test_adj", "adj");
  graph.finalize();
  check_structure(graph);  
}

void test_snap(graphlab::distributed_control& dc) {
  graphlab::distributed_graph<size_t, size_t> graph(dc);
  graph.load_format("data/test_snap", "snap");
  graph.finalize();
  check_structure(graph);  
}

void test_tsv(graphlab::distributed_control& dc) {
  graphlab::distributed_graph<size_t, size_t> graph(dc);
  graph.load_format("data/test_tsv", "tsv");
  graph.finalize();
  check_structure(graph);  
}

void test_powerlaw(graphlab::distributed_control& dc) {
  graphlab::distributed_graph<size_t, size_t> graph(dc);
  graph.load_synthetic_powerlaw(1000);
  graph.finalize();
  ASSERT_EQ(graph.num_vertices(), 1000);
  std::cout << graph.num_edges() << " Edges\n";
}


void test_save_load(graphlab::distributed_control& dc) {
  graphlab::distributed_graph<size_t, size_t> graph(dc);
  graph.load_synthetic_powerlaw(1000);
  graph.finalize();
  ASSERT_EQ(graph.num_vertices(), 1000);
  graph.save_format("data/plawtest_tsv", "tsv");
  graph.save_format("data/plawtest_jrl", "graphjrl");
  // load it back
  graphlab::distributed_graph<size_t, size_t> graph2(dc);
  graph2.load_format("data/plawtest_tsv", "tsv");
  graph2.finalize();
  ASSERT_EQ(graph.num_vertices(), graph2.num_vertices());
  ASSERT_EQ(graph.num_edges(), graph2.num_edges());

  graphlab::distributed_graph<size_t, size_t> graph3(dc);
  graph3.load_format("data/plawtest_jrl", "graphjrl");
  graph3.finalize();
  ASSERT_EQ(graph.num_vertices(), graph3.num_vertices());
  ASSERT_EQ(graph.num_edges(), graph3.num_edges());

}


int main(int argc, char** argv) {
  graphlab::distributed_control dc;
  test_adj(dc);
  test_snap(dc);
  test_tsv(dc);
  test_powerlaw(dc);
  test_save_load(dc);
};


================================================
FILE: tests/test_vertex_set.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#include <vector>
#include <algorithm>
#include <iostream>


// #include <cxxtest/TestSuite.h>

#include <graphlab.hpp>

typedef graphlab::distributed_graph<int,int> graph_type;


bool select_out_degree_le(graph_type::vertex_type vtx, size_t ndeg) {
  return vtx.num_out_edges() <= ndeg;
}


bool select_out_degree_eq(graph_type::vertex_type vtx, size_t ndeg) {
  return vtx.num_out_edges() == ndeg;
}


bool select_vid_modulo(graph_type::vertex_type vtx, size_t divisor) {
  return (vtx.id() % divisor) == 0;
}


size_t is_divisible(graph_type::vertex_type vtx, size_t divisor) {
  return (vtx.id() % divisor) == 0;
}

size_t count_edges(graph_type::edge_type e) {
  return 1;
}

void set_to_one(graph_type::vertex_type vtx) {
  vtx.data() = 1;
}

int vertex_data_identity(graph_type::vertex_type vtx) {
  return vtx.data();
}


int main(int argc, char** argv) {

  global_logger().set_log_level(LOG_INFO);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  graph_type graph(dc);
  graph.load_synthetic_powerlaw(100000);
  graph.finalize();

  dc.cout() << graph.vertex_set_size(graph.complete_set()) << " Vertices\n";

  ASSERT_EQ(graph.vertex_set_size(graph.complete_set()), graph.num_vertices());
  // select all vertices which have <= 1 neighbors
  graphlab::vertex_set small = graph.select(boost::bind(select_out_degree_le, _1, 1));
  dc.cout() << graph.vertex_set_size(small) << " vertices with out degree <= 1\n";

  // all vertices which have  > 1 neighbors
  graphlab::vertex_set connected = graph.complete_set() - small; 
  dc.cout() << graph.vertex_set_size(connected) << " vertices with out degree > 1\n";

  // union of the two of them should give me all vertices
  graphlab::vertex_set all = small | connected;
  ASSERT_EQ(graph.vertex_set_size(all), graph.num_vertices());

  // select all vertices with an even ID
  graphlab::vertex_set even_id = graph.select(boost::bind(select_vid_modulo, _1, 2));
  // select all vertices with ID divisible by 3
  graphlab::vertex_set div_3_id = graph.select(boost::bind(select_vid_modulo, _1, 3));

  // intersect
  graphlab::vertex_set div_6_id = even_id & div_3_id;

  // count the number of IDs which are divisible by 6 
  size_t num_div_6 = graph.map_reduce_vertices<size_t>(boost::bind(is_divisible, _1, 6)); 

  ASSERT_EQ(num_div_6, 1 + (graph.num_vertices() - 1) / 6);

  // do a restricted map_reduce
  size_t num_div_6_restricted = graph.map_reduce_vertices<size_t>
                                (boost::bind(is_divisible, _1, 6), div_6_id); 

  ASSERT_EQ(num_div_6, num_div_6_restricted);

  ASSERT_EQ(graph.vertex_set_size(div_6_id), num_div_6);


  graphlab::vertex_set out_deg_one = graph.select(boost::bind(select_out_degree_eq, _1, 1));
  // test edge mapreduce
  size_t num_small_edges = graph.map_reduce_edges<size_t>(
                            count_edges, out_deg_one, graphlab::OUT_EDGES);
  // since the set only has stuff with out degree == 1... the number
  // of edges must match the size of out_deg_one

  ASSERT_EQ(num_small_edges, graph.vertex_set_size(out_deg_one));

  // test transform 
  // set vdata to 1 for the vertices with out degree 1 
  graph.transform_vertices(set_to_one, out_deg_one);
  size_t total = graph.map_reduce_vertices<size_t>(vertex_data_identity, out_deg_one);
  ASSERT_EQ(total, graph.vertex_set_size(out_deg_one)); 

  // test neighborhood selection 
  // extract the set of out neighbors of out_deg_one 
  graphlab::vertex_set out_nbrs = graph.neighbors(out_deg_one, graphlab::OUT_EDGES);

  dc.cout() << graph.vertex_set_size(out_nbrs) << " nbr size\n";
  // extract the set of in neighbors of these out neighbors
  graphlab::vertex_set out_nbrs_in_nbrs = graph.neighbors(out_nbrs, graphlab::IN_EDGES);

  dc.cout() << graph.vertex_set_size(out_nbrs_in_nbrs) << " nbr nbr size\n";
  // this set must contain the original out_deg_one set
  ASSERT_TRUE(graph.vertex_set_empty((out_deg_one & out_nbrs_in_nbrs) - out_deg_one));
  graphlab::mpi_tools::finalize();
}


================================================
FILE: tests/thread_tools.cxx
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <iostream>

#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/thread_pool.hpp>
#include <graphlab/parallel/atomic.hpp>
#include <graphlab/logger/assertions.hpp>
#include <graphlab/util/timer.hpp>
#include <boost/bind.hpp>

using namespace graphlab;

atomic<int> testval;

void test_inc() {
  usleep(100000);
  testval.inc();
}

void test_dec() {
  usleep(100000);
  testval.dec();
}


void thread_assert_false() {
  ASSERT_TRUE(false);
}


void test_group_exception_forwarding(){
  std::cout << "\n";
  std::cout << "----------------------------------------------------------------\n";
  std::cout << "This test will print a  large number of assertional failures\n";
  std::cout << "and back traces. This is intentional as we are testing the\n" ;
  std::cout << "exception forwarding scheme\n";
  std::cout << "----------------------------------------------------------------\n";
  std::cout << std::endl;

  thread_group group;

  
  thread thr3;
  thr3.launch(thread_assert_false);
  try {
    thr3.join();
  }
  catch(const char* c) {
    std::cout << "Exception " << c << " forwarded successfully!" << std::endl;
  }
  
  
  for (size_t i = 0;i < 10; ++i) {
    group.launch(thread_assert_false);
  }
  
  size_t numcaught = 0;
  while (group.running_threads() > 0) {
    try {
      group.join();
    }
    catch (const char* c){
      std::cout << "Exception " << c << " forwarded successfully!" << std::endl;
      numcaught++;
    }
  }
  std::cout << "Caught " << numcaught << " exceptions!" << std::endl;
  TS_ASSERT_EQUALS(numcaught, (size_t)10);
}

void test_pool(){
  testval.value = 0;
  thread_pool pool(4);
  for (size_t j = 0;j < 10; ++j) {
    for (size_t i = 0;i < 10; ++i) {
      pool.launch(test_inc);
    }
    for (size_t i = 0;i < 10; ++i) {
      pool.launch(test_dec);
    }
    pool.set_cpu_affinity(j % 2);
  }
  
  pool.join();
  TS_ASSERT_EQUALS(testval.value, 0);
}

void test_pool_exception_forwarding(){
  std::cout << "\n";
  std::cout << "----------------------------------------------------------------\n";
  std::cout << "This test will print a  large number of assertional failures\n";
  std::cout << "and back traces. This is intentional as we are testing the\n" ;
  std::cout << "exception forwarding scheme\n";
  std::cout << "----------------------------------------------------------------\n";
  std::cout << std::endl;
  thread_pool pool(10);

  
  thread thr3;
  thr3.launch(thread_assert_false);
  try {
    thr3.join();
  }
  catch(const char* c) {
    std::cout << "Exception " << c << " forwarded successfully!" << std::endl;
  }
  
  
  for (size_t i = 0;i < 10; ++i) {
    pool.launch(thread_assert_false);
    if (i == 50) {
      pool.set_cpu_affinity(true);
    }
  }
  
  size_t numcaught = 0;
  while (1) {
    try {
      pool.join();
      break;
    }
    catch (const char* c){
      std::cout << "Exception " << c << " forwarded successfully!" << std::endl;
      numcaught++;
    }
  }
  std::cout << "Caught " << numcaught << " exceptions!" << std::endl;
  TS_ASSERT_EQUALS(numcaught, (size_t)10);
}


class ThreadToolsTestSuite : public CxxTest::TestSuite {
public:
  void test_thread_group_exception(void) {
   test_group_exception_forwarding();
  }

  void test_thread_pool(void) {
   test_pool();
  }
   
  void test_thread_pool_exception(void) {
    test_pool_exception_forwarding();
  }

};


================================================
FILE: tests/union_find_test.cxx
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <graphlab/util/union_find.hpp>
#include <graphlab/util/random.hpp>
#include <graphlab/parallel/pthread_tools.hpp>

graphlab::concurrent_union_find uf2;

void add_even() {
  for (size_t i = 2; i < 1000000; i+=2) {
    size_t unionwith = 0;
    while(1){
      unionwith = graphlab::random::fast_uniform((size_t)0, i - 1);
      if (unionwith % 2 == 0) break;
    }
    uf2.merge(i, unionwith);
  }
}


void add_odd() {
  for (size_t i = 3; i < 1000000; i+=2) {
    size_t unionwith = 0;
    while(1){
      unionwith = graphlab::random::fast_uniform((size_t)0, i - 1);
      if (unionwith % 2 == 1) break;
    }
    uf2.merge(i, unionwith);
  }
}


class UnionFindTest: public CxxTest::TestSuite {
 public:
  void test_union_find() {
    graphlab::union_find<size_t, size_t> uf;
    uf.init(1000);
    // union all the odd together and all the even together
    for (size_t i = 2; i < 1000; i+=2) {
      size_t unionwith = 0;
      while(1){
        unionwith = graphlab::random::fast_uniform((size_t)0, i - 1);
        if (unionwith % 2 == 0) break;
      }
      uf.merge(i, unionwith);
    }
    
    // union all the odd together and all the even together
    for (size_t i = 3; i < 1000; i+=2) {
      size_t unionwith = 0;
      while(1){
        unionwith = graphlab::random::fast_uniform((size_t)0, i - 1);
        if (unionwith % 2 == 1) break;
      }
      uf.merge(i, unionwith);
    }

    // assert that all evens are together and all odds are together
    size_t evenid = uf.find(0);
    for (size_t i = 0; i < 1000; i+=2) {
      TS_ASSERT_EQUALS(uf.find(i), evenid);
    }

    size_t oddid = uf.find(1);
    for (size_t i = 1; i < 1000; i+=2) {
      TS_ASSERT_EQUALS(uf.find(i), oddid);
    }
  }

  void test_union_find2() {
    uf2.init(1000000);

    graphlab::thread_group tg;
    tg.launch(add_even);
    tg.launch(add_even);
    tg.launch(add_even);
    tg.launch(add_odd);
    tg.launch(add_odd);
    tg.launch(add_odd);

    tg.join();


    // assert that all evens are together and all odds are together
    size_t evenid = uf2.find(0);
    for (size_t i = 0; i < 1000000; i+=2) {
      TS_ASSERT_EQUALS(uf2.find(i), evenid);
    }

    size_t oddid = uf2.find(1);
    for (size_t i = 1; i < 1000000; i+=2) {
      TS_ASSERT_EQUALS(uf2.find(i), oddid);
    }
  }
};

================================================
FILE: toolkits/CMakeLists.txt
================================================
project(GraphLab)

# link_libraries(${Boost_LIBRARIES})
# link_libraries(${GraphLab_LIBRARIES})


macro(add_all_subdirectories retval curdir)
  file(GLOB sub-dir RELATIVE ${curdir} *)
  set(list_of_dirs "")
  foreach(dir ${sub-dir})
    if(IS_DIRECTORY ${curdir}/${dir})
    STRING(SUBSTRING ${dir} 0 1 firstchar)
        if(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_" )
        else(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_")
          set(list_of_dirs ${list_of_dirs} ${dir})
          message(STATUS "Detected Toolkit: " ${dir})
          add_subdirectory(${dir})
        endif()
    endif()
  endforeach()
  set(${retval} ${list_of_dirs})
endmacro()

add_all_subdirectories(retval, ${CMAKE_CURRENT_SOURCE_DIR})


================================================
FILE: toolkits/README.md
================================================
# Toolkits Overview

## Collaborative Filtering

![Collaborative Filtering](../images/collaborative_filtering.jpg?raw=true "collaborative_filtering")

The collaborative filtering toolkit provides tools to identify patterns of user interests and make targeted recommendations. Learn more about collaborative filtering [here.](http://en.wikipedia.org/wiki/Collaborative_Filter)

![ALS equation](../images/als_eqn.gif "als_eqn")

Most of the algorithms take the rating matrix R, which is a sparse matrix holding the rating given by users to movies, and builds a linear model, finding two low dimensional matrices

U and V s.t. their product approximates R: R ~ UV.

We implement multiple collaborative filtering algorithms: ALS (Alternating least squares), SGD (Stochastic Gradient Descent) , Bias SGD, Weighted-ALS, Sparse-ALS, SVD++.

## Clustering

![](../images/kmeans1.gif "kmeans")

## KMeans++

GraphLab PowerGraph has a fast scalable implementation of the [Kmeans++](http://en.wikipedia.org/wiki/K-means%2B%2B) algorithm: a robust method of grouping datapoints into clusters.

## Computer Vision

![Image Stitching](../images/panorama-1024x251.png "Image Stitching")

The Computer Vision Toolkit aims to provide fully distributed wrappers to algorithms in [OpenCV](http://opencv.org/ "OpenCV"), an open-source library aimed at real-time computer vision. Currently, the only implemented algorithm is Image-Stitching, where the goal is to create a composite panoramic image from a collection of images. Learn more about computer vision [here.](http://en.wikipedia.org/wiki/Computer_vision)

GraphLab PowerGraph Computer Vision Toolkit has become it’s own spin-off project called [CloudCV](http://cloudcv.org "CloudCV"), a comprehensive system that will aims to provide access to state-of-the-art computer vision algorithms on the cloud.

![CloudCV](../images/cloudcv-1024x489.png "CloudCV")

CloudCV: Large-Scale Parallel Computer Vision on the Cloud


## Graphical Models
![](../doc/images/noisy_img.jpeg)
![](../images/Slide1.jpg "Slide1")
![](../doc/images/pred_img.jpeg)

[Graphical models](http://en.wikipedia.org/wiki/Graphical_model) provide a compact interpretable representation of complex statistical phenomena by encoding random variables as vertices in a graph and relationships between those variables as edges. The Graphical Models toolkit provides a collection of methods to make predictions under uncertainty, and for reasoning about structured noisy data.

The main components of Graphical Models toolkit are:

1.  Distributed Dual Decomposition: performs maximum _a posteriori_ (MAP) inference in general [Markov Random Fields](http://en.wikipedia.org/wiki/Markov_random_field) via the Dual Decomposition algorithm. The MRF is assumed to be provided in the standard [UAI file format](http://www.cs.huji.ac.il/project/PASCAL/fileFormat.php). Maintained by [Dhruv Batra](http://filebox.ece.vt.edu/~dbatra/).
2.  [Structured Prediction](http://docs.graphlab.org/graphical_models.html): that applies the [Loopy Belief propagation (LBP)](http://en.wikipedia.org/wiki/Belief_propagation) algorithm to a pair-wise [Markov Random Field](http://en.wikipedia.org/wiki/Markov_random_field) encoding the classic [Potts Model](http://en.wikipedia.org/wiki/Potts_model).

## Graph Analytics

![](../images/r-300x278.jpg "r")

The Graph Analytics Toolkit aims to provide high performance, distributed tools for graph mining, for use in community detection, social network discovery, etc.

The toolkit currently implements the following tools:

### Triangle Counting
![](../images/triangle_weak_community-150x150.jpg "triangle_weak_community")
![](../images/triangle_strong_community-150x150.jpg "triangle_strong_community")


Two triangle counting program:

1.  **Undirected Triangle Counting**: counts the total number of triangles in a graph, or the the number of triangles each vertex is in
2.  **Directed Triangle Counting:** Counts the number of types of triangles each vertex is in

### PageRank

![](../images/800px-PageRank-hi-res-300x215.png "800px-PageRank-hi-res")

A classical graph algorithm which assigns each vertex a numerical importance value based on random walk properties. Learn more about page rank [here.](http://en.wikipedia.org/wiki/PageRank)

### KCore Decomposition

![](../images/kcore2-300x300.jpg "kcore")

Identifies a hierarchical ordering of the vertices in the graph, allowing discovery of the central components of the network.

## Topic Modeling

![](../images/topic-300x179.gif "topic")

## Latent Dirichlet Allocation

The topic modelling toolbox currently implemented the [Latent Dirichlet Allocation](http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) algorithm for deriving semantic topic information from a corpus of plain text.

## Linear Solvers

GraphLab PowerGraph iterative solvers, for solving a linear system of the type Ax = b.

Currently [Jacobi method](http://en.wikipedia.org/wiki/Jacobi_method) is implemented.

================================================
FILE: toolkits/clustering/CMakeLists.txt
================================================
project(GraphProcessing)

add_graphlab_executable(kmeans kmeans.cpp)
add_graphlab_executable(generate_synthetic generate_synthetic.cpp)
add_graphlab_executable(spectral_clustering spectral_clustering.cpp)
add_graphlab_executable(graph_laplacian_for_sc graph_laplacian_for_sc.cpp)


================================================
FILE: toolkits/clustering/clustering.dox
================================================
	/**

\page clustering Clustering

This toolkit will contain implementations of clustering algorithm.

Currently the algorithms implemented are 

\li \ref clustering_kmeans "KMeans++"
\li \ref clustering_spectral_clustering "Spectral Clustering"


\section clustering_kmeans KMeans++

The \c kmeans program implements the 
<a href="http://en.wikipedia.org/wiki/K-means%2B%2B">KMeans++</a> algorithm described 
by 

Arthur, D. and Vassilvitskii, S. (2007). "k-means++: the advantages of careful seeding". 
Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms. pp. 1027–1035.

It takes as input a collection of files where each line in each file represents
a data point.  Each line must contains a list of numbers, white-space or comma
separated. Each line must be the same length. 

For instance in this example input file, there are 6 datapoints, one per line,
and each datapoint is a point in a 5-dimensional space.  

\verbatim
-10.7551  6.82178 5.33455 -2.08247  2.86694 
-1.36687  10.8464 -5.28851  -4.26768  -5.50659  
-8.79834  8.01002 5.33418 0.102824  3.23318 
-8.64345  6.81946 1.2309  -4.46784  2.26341 
-8.29782  7.1154  3.32559 -2.59422  2.33936 
-8.12504  8.98924 4.15027 0.253153  1.75911 
\endverbatim

\subsection clustering_kmeans_synthetic Synthetic Data
Example synthetic data can be generated by running

\verbatim
> ./generate_synthetic [Number of Clusters] [Number of Dimensions] [Number of datapoints]
\endverbatim

This will generate a file called \c synthetic.txt , and will also output to screen
the cluster centers.

For instance:
\verbatim
> ./generate_synthetic 2 3 10
Usage: generate_synthetic [NClusters] [Dimensions] [Ndata]
Center 0 at: -6.69675 0.355189  -4.88601  
Center 1 at: 5.85919  0.0388327 5.50007  

> cat synthetic.txt
-4.31568  -0.396959 -6.29507  
-4.56112  -1.74917  -4.57874  
4.54508 0.102845  6.35385 
4.87746 -0.832591 7.06942 
-5.91254  -0.278006 -4.25934  
6.95139 0.120139  4.89531 
-6.28538  -0.88527  -4.74988  
-6.84791  0.887664  -4.91919  
7.47117 1.67911 6.02221 
-4.78011  1.2099  -4.55519  
\endverbatim

\note Mathematically, the synthetic generator draws centers from the [-10,10]^D
uniform hypercube and draws data points by sampling uniformly from the centers,
and around a unit gaussian around each center.


\subsection clustering_kmeans_running Running KMeans
To run Kmeans clustering:
\verbatim
> ./kmeans --data=[data prefix] --cluster=[N cluster] --output-clusters=[cluster output file]
\endverbatim

All files beginning with the data prefix will be loaded. The data may be on
HDFS.  The \c --cluster option is the number of clusters to solve for.

The cluster output file must be a target accessible on the local file system.
(In the distributed case, it must be accessible from the 0th machine).
This file will contain a list of all the solved cluster centers.

For instance, running on the synthetic data above:
\verbatim
>./kmeans --data=synthetic.txt --clusters=2 --output-clusters=cluster.txt
Number of datapoints: 10
Validating data...Initializing using Kmeans++
Running Kmeans...
Kmeans iteration 1: # points with changed assignments = 0
Writing Cluster Centers...

>cat cluster.txt
-5.45046  -0.201973 -4.8929 
5.96127 0.267376  6.0852  
\endverbatim


To also output the cluster center assignments for each datapoint,
add the option:
\verbatim
--output-data=[output prefix]
\endverbatim

Tne <tt>output prefix</tt> is where the output data will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output files contain firstly, the original data point, followed
$by the cluster number it was assigned to. These need not be in the same order
as the original input.

For instance, running kmeans on the example synthetic data above may produce
the following output:

\verbatim
>./kmeans --data=synthetic.txt --clusters=2 --output-clusters=cluster.txt --output-data=data.txt
Number of datapoints: 10
Validating data...Initializing using Kmeans++
Running Kmeans...
Kmeans iteration 1: # points with changed assignments = 0
Writing Cluster Centers...
Writing Data with cluster assignments...

>cat data.txt
-4.78011  1.2099  -4.55519  0
7.47117 1.67911 6.02221 1
-6.84791  0.887664  -4.91919  0
-6.28538  -0.88527  -4.74988  0
6.95139 0.120139  4.89531 1
-5.91254  -0.278006 -4.25934  0
4.87746 -0.832591 7.06942 1
4.54508 0.102845  6.35385 1
-4.56112  -1.74917  -4.57874  0
-4.31568  -0.396959 -6.29507  0
\endverbatim

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./kmeans ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection clustering_kmeans_sparse --sparse Option

If <tt>--sparse=1</tt> is set (default is 0), the program will use 
a sparse vector representation. 
The file format is [feature id]:[value] [feature id]:[value] ...
Each line corresponds to a datapoint.
[feature id] must be positive integer or zero.

For instance:
\verbatim
> cat synthetic_sparse.txt
0:-4.31568  3:-0.396959 5:-6.29507  
5:-4.56112  9:-1.74917  
4:4.54508 5:0.102845  10:6.35385 
0:4.87746 7:-0.832591 
\endverbatim


\subsection clustering_kmeans_id --id Option

If <tt>--id=1</tt> is set (default is 0), the program will use 
id for each data point. The id of a data point must be written at the head 
of each line of the input data.

For instance:
\verbatim
> cat synthetic_with_id.txt
1 -4.31568  -0.396959 -6.29507  
2 -4.56112  -1.74917  -4.57874  
3 4.54508 0.102845  6.35385 
4 4.87746 -0.832591 7.06942 
\endverbatim

The output data will consist of two columns: one for the ids and the other for 
the assigned clusters.

For instance:
\verbatim
> cat data_with_id.txt
1 0
2 0  
3 1
4 1 
\endverbatim

<tt>--id</tt> can be used with <tt>--sparse</tt>.

\subsection clustering_kmeans_edge_data Adding Pairwise Reward

This program can consider pairwise rewards (and penalty) by using 
<tt>--pairwise-reward=[file prefix]</tt>. All files beginning with 
<tt>[file prefix]</tt> will be loaded for pairwise rewards. Each 
line of the pairwise reward file holds [id1] [id2] [weight].
This option must be used with <tt>--id=1</tt>.
 
For instance:
\verbatim
> cat pairwise_data.txt
1 2 10
2 4 -10
\endverbatim

In this example the evaluation function will gain 10 when data <tt>1</tt> 
and data <tt>2</tt> are in the same cluster; it will gain nothing otherwise.


\subsection clustering_kmeans_options Options
\li \b --data (Required). The prefix from which to load the input data
\li \b --clusters (Required). The number of cluster centers
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation. <b>Due to some implementation limitations within GraphLab,
   this parameter is not respected. It will use all processors on your machine
   if ran in Linux, and will use only 1 processor if ran on Mac</b> 
\li \b --output-data  (Optional) A target prefix to write the output data 
   with cluster assignments. May be on HDFS.
\li \b --output-clusters (Optional) A target location to write the cluster centers.
   Must be on the local file system.
\li \b --sparse (Optional. Default 0) If set at 1, will use sparse vector representation
\li \b --id (Optional. Default 0) If set at 1, will use ids for data points
\li \b --pairwise-reward (Optional) If set, will consider pairwise rewards written in the 
   files beginning with the given argument
\li \b --max-iteration (Optional) The max number of iterations


\section clustering_spectral_clustering Spectral Clustering

This program takes as input a collection of files where each line in each file represents 
a data point. Each line must contains an id and a list of numbers, white-space or comma 
separated. Each line must be the same length. 
For instance in this example input file, there are 6 datapoints, one per line,
and each datapoint is a point in a 5-dimensional space.  

\verbatim
1 -10.7551  6.82178 5.33455 -2.08247  2.86694
2 -1.36687  10.8464 -5.28851  -4.26768  -5.50659
3 -8.79834  8.01002 5.33418 0.102824  3.23318 
4 -8.64345  6.81946 1.2309  -4.46784  2.26341 
5 -8.29782  7.1154  3.32559 -2.59422  2.33936
6 -8.12504  8.98924 4.15027 0.253153  1.75911 
\endverbatim

The ids of data points must start from 1 and must not skip any numbers.

To run spectral clustering, the minimal set of options required are:

\verbatim
> ./spectral_clustering --data=[data prefix] --clusters=[N cluster]
\endverbatim

This program uses svd in Graphlab Collaborative Filtering Toolkit, 
kmeans in Graphlab Clustering Toolkit and eigen_vector_normalization in 
Graphlab Graph Analytics Toolkit.  The paths to the directories are
specified by <tt>--svd-dir</tt>, <tt>--kmeans-dir</tt> and 
<tt>--graph-analytics-dir</tt>, respectively.

The program will create some intermediate files. The final clustering
result is written in files named <tt>[data prefix].result</tt> with a suffix,
for example <tt>[data prefix].result_1_of_4</tt>. The clustering result
data will consist of two columns: one for the ids and the other for the
assigned clusters. For instance:

\verbatim
1 1
2 0
3 1
4 1
5 1
\endverbatim

<b>NOTE:</b> To run the spectral clustering in a distributed setting, you must use the 
"mpi-args" option, not like other graphlab toolkits. 
The spectral clustering calls other graphlab programs.
When "--mpi-args" is set, these graphlab programs are called with "mpiexec" and the 
string written after the "mpi-args" option.
For example, if you set --mpi-args="-n 4 --hostfile host", the program calls the 
other graphlab programs with "mpiexec -n 4 --hostfile host".

\subsection Options
Relevant options are:
\li \b --data (Required). The prefix from which to load the input data
\li \b --clusters (Required). The number of clusters
\li \b --sigma (Optional. Default 0.1). Scale parameter for Gaussian kernel.
This value is often critical to the clustering result.
\li \b --t-nearest (Optional. Default 20). Number of nearest neighbors (=t). 
Will use only the t-nearest similarities for each datapoint. If set at 0,
will use all similarities. 
\li \b --similarity-thres (Optional). Threshold to discard small similarities.
If a value is set, similarities less than this value will be discarded.
\li \b --svd-dir (Optional Default ../collaborative_filtering/).
Path to the directory where Graphlab svd is located
\li \b --kmeans-dir (Optional. Default ../clustering/). 
Path to the directory where Graphlab kmeans is located
\li \b --graph-analytics-dir (Optional. Default ../graph_analytics/).
Path to the directory where Graphlab eigen_vector_normalization is located
\li \b --ncpus (Optional. Default 2). The number of processors that will be used for computation. 
<b>Due to some implementation limitations within GraphLab, 
this parameter is not respected. It will use all processors on your machine 
if ran in Linux, and will use only 1 processor if ran on Mac</b> 
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.
\li \b --mpi-args (Optional, Default empty). If set, will execute mipexec with the given string.


*/


================================================
FILE: toolkits/clustering/generate_synthetic.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab.hpp>
#include <fstream>

int main(int argc, char** argv) {
  std::ofstream fout("synthetic.txt");
  
  size_t num_clusters = 2;
  size_t dim = 2;
  size_t ndata = 10000;
  if (argc >= 2) num_clusters = atoi(argv[1]);
  if (argc >= 3) dim = atoi(argv[2]);
  if (argc >= 4) ndata = atoi(argv[3]);

  std::cout << "Usage: generate_synthetic [NClusters] [Dimensions] [Ndata]\n";

  std::vector< std::vector<double> > centers(num_clusters);
  for (size_t i = 0;i < centers.size(); ++i) {
    std::cout << "Center " << i << " at: " ; 
    for (size_t j = 0; j < dim; ++j) {
      double r = graphlab::random::fast_uniform<double>(-10,10);
      std::cout << r << "\t";
      centers[i].push_back(r);
    }
    std::cout << "\n";
  }

  // now generate data points
  // 
  for (size_t i = 0;i < ndata; ++i) {
    size_t c = graphlab::random::fast_uniform<size_t>(0, centers.size() - 1);
    for (size_t j = 0; j < dim; ++j) {
      double d = graphlab::random::gaussian() + centers[c][j];
      fout << d << "\t";
    }
    fout << "\n";
  }
}


================================================
FILE: toolkits/clustering/graph_laplacian_for_sc.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <string>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <vector>
#include <map>
#include <time.h>

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>

#include <graphlab.hpp>
#include <graphlab/graph/distributed_graph.hpp>

//shared parameters
float gaussian_kernel_scale_parameter = 0.1;
float threshold_to_discard_small_similarities = 0.0;
size_t number_of_nearest_neighbors= 20;


//data point
struct vertex_data {
  std::vector<float> x;
  float D_ii;
  vertex_data():x(), D_ii(0.0) {};
  explicit vertex_data(const std::vector<float>& x_in) :
      x(x_in), D_ii(0.0) {}

  void save(graphlab::oarchive& oarc) const {
    oarc << x.size();
    for(size_t i=0;i<x.size();++i)
      oarc << x[i];
    oarc << D_ii;
  }

  void load(graphlab::iarchive& iarc) {
    size_t size = 0;
    iarc >> size;
    for(size_t i=0;i<size;++i){
      float temp = 0.0;
      iarc >> temp;
      x.push_back(temp);
    }
    iarc >> D_ii;
  }
};

//similarity
struct edge_data{
  float A_ij;
  bool nearest;
  edge_data() : A_ij(0.0), nearest(false){}
  void save(graphlab::oarchive& oarc) const {
    oarc << A_ij << nearest;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> A_ij >> nearest;
  }
};

typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


//[vertex_id] [element1] [element2] [element3] ...
bool line_parser(graph_type& graph, const std::string& filename,
    const std::string& line) {
  if (line.empty()) return true;
  size_t id = 0;
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  vertex_data vtx;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(id) = qi::_1] >> -qi::char_(",") >>
      (qi::double_[phoenix::push_back(phoenix::ref(vtx.x), qi::_1)] % -qi::char_(",") )
      )
     ,
     //  End grammar
     ascii::space);
  if (!success) return false;
  graph.add_vertex(id, vtx);

  for(size_t i=1;i<id;++i){
    graph.add_edge(i, id);
  }

  return true;
}

// helper function to compute similarity between points
float similarity(const std::vector<float>& v1, const std::vector<float>& v2) {
  float ret = 0.0;
  for (size_t i = 0; i < v1.size(); ++i) {
    float tmp = v1[i] - v2[i];
    ret += tmp * tmp;
  }
  return exp(-ret / gaussian_kernel_scale_parameter);
}

//calculate similarities between data points
void calc_similarities(graph_type::edge_type& edata) {
  edata.data().A_ij = similarity(edata.source().data().x, edata.target().data().x);
}


//discard small similarities (Optional)
void discard_small_similarity(graph_type::edge_type& edata) {
  if(edata.data().A_ij < threshold_to_discard_small_similarities)
    edata.data().A_ij = 0.0;
}


//gather T-nearest neighbor (Optional)
struct top_t_similarity{
  std::vector<size_t> ids;
  std::vector<float> sims;
  top_t_similarity(): ids(number_of_nearest_neighbors, std::numeric_limits<size_t>::max()),
      sims(number_of_nearest_neighbors, -1.0){}
  top_t_similarity(size_t id, float sim): ids(number_of_nearest_neighbors, std::numeric_limits<size_t>::max()),
      sims(number_of_nearest_neighbors, -1.0){
    ids[0] = id;
    sims[0] = sim;
  }

  top_t_similarity& operator+=(const top_t_similarity& other){
    std::vector<size_t> new_ids;
    std::vector<float> new_sims;
    size_t pos1=0;
    size_t pos2=0;
    while(pos1+pos2 < number_of_nearest_neighbors){
      if(sims[pos1] >= other.sims[pos2]){
        new_ids.push_back(ids[pos1]);
        new_sims.push_back(sims[pos1]);
        pos1++;
      }else{
        new_ids.push_back(other.ids[pos2]);
        new_sims.push_back(other.sims[pos2]);
        pos2++;
      }
    }
    ids = new_ids;
    sims = new_sims;
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << ids.size();
    for(size_t i=0;i<ids.size();++i)
      oarc << ids[i];
    for(size_t i=0;i<sims.size();++i)
      oarc << sims[i];
  }
  void load(graphlab::iarchive& iarc) {
    ids.clear();
    sims.clear();
    size_t size = 0;
    iarc >> size;
    for(size_t i=0;i<size;++i){
      size_t id = 0;
      iarc >> id;
      ids.push_back(id);
    }
    for(size_t i=0;i<size;++i){
      float sim = 0;
      iarc >> sim;
      sims.push_back(sim);
    }
  }
};

//get T-nearest neighbor and discard others (Optional)
class t_nearest: public graphlab::ivertex_program<graph_type,
  top_t_similarity>, public graphlab::IS_POD_TYPE {
private:
  float threshold;

public:
  t_nearest():threshold(0.0){}

  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  top_t_similarity gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    if(edge.target().id() == vertex.id()){//in edge
      return top_t_similarity(edge.source().id(), edge.data().A_ij);
    }else{//out edge
      return top_t_similarity(edge.target().id(), edge.data().A_ij);
    }
  }

  //assign a cluster, considering the clusters of neighbors
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    threshold = total.sims[number_of_nearest_neighbors-1];
//    std::cout << vertex.id() << "\t" << total.ids[0] << "-" << total.sims[0] << ", "
//        << total.ids[1] << "-" << total.sims[1] << std::endl;
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
      return graphlab::ALL_EDGES;
  }
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    if(edge.data().A_ij >= threshold)
      edge.data().nearest = true;
  }
};

//discard small similarities (Optional)
void make_other_similarities_zero(graph_type::edge_type& edata) {
  if(edata.data().nearest == false)
    edata.data().A_ij = 0.0;
}


//compute sums over rows and then take inverse square root
class calc_degrees: public graphlab::ivertex_program<graph_type,
    float>, public graphlab::IS_POD_TYPE {
public:
  //gather A_ij
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  float gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    return edge.data().A_ij;
  }

  //assign a cluster, considering the clusters of neighbors
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    vertex.data().D_ii = 1.0 / sqrt(total);
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
      return graphlab::NO_EDGES;
  }
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
  }
};

//multiply D^-1/2
void mult_D(graph_type::edge_type& edata) {
  edata.data().A_ij = edata.data().A_ij * edata.source().data().D_ii * edata.target().data().D_ii;
}

struct max_min_similarity{
  float max_sim;
  float min_sim;

  max_min_similarity(): max_sim(0.0), min_sim(0.0){}
  explicit max_min_similarity(float similarity): max_sim(similarity),
      min_sim(similarity){}

  max_min_similarity& operator+=(const max_min_similarity& other){
    if(max_sim < 1.0 && other.max_sim < 1.0){
      max_sim = std::max(max_sim, other.max_sim);
    }else if(other.max_sim < 1.0){
      max_sim = other.max_sim;
    }
    if(min_sim > 0.0 && other.min_sim > 0.0){
      min_sim = std::min(min_sim, other.min_sim);
    }else if(other.min_sim > 0.0){
      min_sim = other.min_sim;
    }
    return *this;
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << max_sim << min_sim;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> max_sim >> min_sim;
  }
};
max_min_similarity absolute_edge_data(const graph_type::edge_type& edge) {
  return max_min_similarity(edge.data().A_ij);
}

struct max_vid{
  size_t vid;
  max_vid(): vid(0){}
  explicit max_vid(size_t in_vid): vid(in_vid){}

  max_vid& operator+=(const max_vid& other){
    vid = std::max(vid, other.vid);
    return *this;
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << vid;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> vid;
  }
};
max_vid absolute_vertex_data(const graph_type::vertex_type& vertex) {
  return max_vid(vertex.id());
}

class graph_writer {
public:
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    size_t vid = v.id();
    if(vid == 0)
      return "";
    strm << vid << " " << vid << " 1.0\n";
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) {
    const float& A_ij = e.data().A_ij;
    std::stringstream strm;
    if(A_ij > 0.0){
      strm << e.source().id() << " " << e.target().id() << " " <<
          A_ij << "\n";
      strm << e.target().id() << " " << e.source().id() << " " <<
          A_ij << "\n";
    }
    return strm.str();
  }
};

int main(int argc, char** argv) {
  std::cout << "construct graph Laplacian for spectral clustering.\n\n";

  //parse command line
  std::string datafile;
  graphlab::command_line_options clopts
    ("Constructing graph Laplacian for spectral clustering");
  clopts.attach_option("data", datafile,
                       "Input file. Each line hold a sample id followed by a white-space or "
                       "comma separated numeric vector. Id should start from 1");
  clopts.attach_option("sigma",  gaussian_kernel_scale_parameter,
                       "Scale parameter for Gaussian kernel.");
  clopts.attach_option("similarity-thres", threshold_to_discard_small_similarities,
                       "Threshold to discard small similarities. ");
  clopts.attach_option("t-nearest", number_of_nearest_neighbors,
                      "Number of nearest neighbors (=t). Will use only the t-nearest similarities "
                      "for each datapoint. If set at 0, will use all similarities.");
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (datafile == "") {
    std::cout << "--data is not optional\n";
    return EXIT_FAILURE;
  }
  gaussian_kernel_scale_parameter *= 2.0*gaussian_kernel_scale_parameter;

  //construct graph
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  graph_type graph(dc, clopts);
  graph.load(
      datafile,
      line_parser);
  graph.finalize();

  time_t start, end;
  time(&start);
  size_t data_num = graph.map_reduce_vertices<max_vid>(absolute_vertex_data).vid;

  //calculate similarities
  graph.transform_edges(calc_similarities);

  //show the max similarity less than 1 and the min similarity grater than 0
  max_min_similarity stat = graph.map_reduce_edges<max_min_similarity>(absolute_edge_data);
  dc.cout() << "max squared distance(min similarity): "
      << -log(stat.min_sim)*gaussian_kernel_scale_parameter
      << "(" << stat.min_sim << ")\n"
      << "min squared distance(max similarity):"
      << -log(stat.max_sim)*gaussian_kernel_scale_parameter
      << "(" << stat.max_sim << ")\n";


  //if t is set, use only t-nearest similarities
  if(number_of_nearest_neighbors > 0){
    if(number_of_nearest_neighbors > data_num-1)
      number_of_nearest_neighbors = data_num-1;
    dc.cout() << "use only the " << number_of_nearest_neighbors
        << "-nearest similarities for each datapoint\n";
    graphlab::omni_engine<t_nearest> engine_nearest(dc, graph, "sync", clopts);
    engine_nearest.signal_all();
    engine_nearest.start();
    graph.transform_edges(make_other_similarities_zero);
  }
  //if threshold is set, discard similarities less then the threshold
  if(threshold_to_discard_small_similarities > 0.0){
    dc.cout() << "discard small similarities less than "
        << threshold_to_discard_small_similarities << "\n";
    graph.transform_edges(discard_small_similarity);
  }

  //sum elements over rows (calculate the degree matrix D)
  graphlab::omni_engine<calc_degrees> engine(dc, graph, "sync", clopts);
  engine.signal_all();
  engine.start();
  //multiply D
  graph.transform_edges(mult_D);
  time(&end);

  dc.cout() << "graph calculation time is " << (end - start) << " sec\n";
  dc.cout() << "writing...\n";

  //write results
  const std::string outputname = datafile + ".glap";
  graph.save(
      outputname + "_diag",
      graph_writer(), false, //set to true if each output file is to be gzipped
      true, //whether vertices are saved
      false,1); //whether edges are saved
  graph.save(
      outputname + "_other",
      graph_writer(), false, //set to true if each output file is to be gzipped
      false, //whether vertices are saved
      true,1); //whether edges are saved

  //write the number of data
  const std::string datanum_filename = datafile + ".datanum";
  std::ofstream ofs(datanum_filename.c_str());
  if(!ofs) {
    std::cout << "can't create file for number of data" << std::endl;
    return EXIT_FAILURE;
  }
  ofs << data_num;

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/clustering/kmeans.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This implements the classical "k-means" clustering algorithm.
 *
 * It takes as input file a series of lines where each line is a comma separated
 * or space separated list of values representing a vector. For instance:
 *
 * \verbatim
 * 1.1, 1.5, 0.9
 * 0.3, 0.4, -1.1
 * ...
 * \endverbatim
 *
 * It constructs a graph with a single vertex for each data point and simply
 * uses the "Map-Reduce" scheme to perform a k-means clustering of all
 * the datapoints.
 */


#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/tokenizer.hpp>

#include <limits>
#include <vector>
#include <map>
#include <iostream>
#include <stdlib.h>

#include <graphlab.hpp>


size_t NUM_CLUSTERS = 0;
bool IS_SPARSE = false;

struct cluster {
  cluster(): count(0), changed(false) { }
  std::vector<double> center;
  std::map<size_t, double> center_sparse;
  size_t count;
  bool changed;

  void save(graphlab::oarchive& oarc) const {
    oarc << center << count << changed << center_sparse;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> center >> count >> changed >> center_sparse;
  }
};

std::vector<cluster> CLUSTERS;

// the current cluster to initialize
size_t KMEANS_INITIALIZATION;

struct vertex_data{
  std::vector<double> point;
  std::map<size_t, double> point_sparse;
  size_t best_cluster;
  double best_distance;
  bool changed;

  void save(graphlab::oarchive& oarc) const {
    oarc << point << best_cluster << best_distance << changed << point_sparse;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> point >> best_cluster >> best_distance >> changed >> point_sparse;
  }
};

//use edges when edge weight file is given
struct edge_data {
  double weight;

  edge_data() :
      weight(0.0) {
  }
  explicit edge_data(double w) :
      weight(w) {
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << weight;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> weight;
  }
};

// helper function to compute distance between points
double sqr_distance(const std::vector<double>& a,
                    const std::vector<double>& b) {
  ASSERT_EQ(a.size(), b.size());
  double total = 0;
  for (size_t i = 0;i < a.size(); ++i) {
    double d = a[i] - b[i];
    total += d * d;
  }
  return total;
}

double sqr_distance(const std::map<size_t, double>& a,
                    const std::map<size_t, double>& b) {
  double total = 0.0;
  for(std::map<size_t, double>::const_iterator iter = a.begin();
      iter != a.end(); ++iter){
    size_t id = (*iter).first;
    double val = (*iter).second;
    if(b.find(id) != b.end()){
      double d = val - b.at(id);
      total += d*d;
    }else{
      total += val * val;
    }
  }
  for(std::map<size_t, double>::const_iterator iter = b.begin();
      iter != b.end(); ++iter){
    double val = (*iter).second;
    if(a.find((*iter).first) == a.end()){
      total += val * val;
    }
  }

  return total;

////   cosine distance is better for sparse datapoints?
//    double ip = 0.0;
//    double lenA = 0.0;
//    double lenB = 0.0;
//    for(std::map<size_t, double>::const_iterator iter = a.begin();
//        iter != a.end(); ++iter){
//      size_t id = (*iter).first;
//      double val = (*iter).second;
//      if(b.find(id) != b.end()){
//        ip += val * b.at(id);
//      }
//      lenA += val*val;
//    }
//
//    if(ip == 0.0 || lenA == 0.0)
//      return 1.0;
//
//    for(std::map<size_t, double>::const_iterator iter = b.begin();
//        iter != b.end(); ++iter){
//      double val = (*iter).second;
//      lenB += val * val;
//    }
//
//    if(lenB == 1.0)
//      return 1.0;
//
//    return 1.0 - ip/(sqrt(lenA)*sqrt(lenB));

}


// helper function to add two vectors
std::vector<double>& plus_equal_vector(std::vector<double>& a,
                                       const std::vector<double>& b) {
  ASSERT_EQ(a.size(), b.size());
  for (size_t i = 0;i < a.size(); ++i) {
    a[i] += b[i];
  }
  return a;
}

// helper function to add two vectors
std::map<size_t, double>& plus_equal_vector(std::map<size_t, double>& a,
                                       const std::map<size_t, double>& b) {
  for(std::map<size_t, double>::const_iterator iter = b.begin();
    iter != b.end(); ++iter){
    size_t id = (*iter).first;
    double val = (*iter).second;
    if(a.find(id) != a.end()){
      a[id] += b.at(id);
    }else{
      a.insert(std::make_pair(id, val));
    }
  }
  return a;
}

// helper function to scale a vector vectors
std::vector<double>& scale_vector(std::vector<double>& a, double d) {
  for (size_t i = 0;i < a.size(); ++i) {
    a[i] *= d;
  }
  return a;
}

// helper function to scale a vector vectors
std::map<size_t, double>& scale_vector(std::map<size_t, double>& a, double d) {
  for(std::map<size_t, double>::iterator iter = a.begin();
    iter != a.end(); ++iter){
  size_t id = (*iter).first;
  double val = (*iter).second;
  a[id] = val*d;
//    (*iter).second *= d;
  }
  return a;
}


typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

graphlab::atomic<graphlab::vertex_id_type> NEXT_VID;

// Read a line from a file and creates a vertex
bool vertex_loader(graph_type& graph, const std::string& fname,
                   const std::string& line) {
  if (line.empty()) return true;
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  vertex_data vtx;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),
     //  Begin grammar
     (
      (qi::double_[phoenix::push_back(phoenix::ref(vtx.point), qi::_1)] % -qi::char_(",") )
      )
     ,
     //  End grammar
     ascii::space);

  if (!success) return false;
  vtx.best_cluster = (size_t)(-1);
  vtx.best_distance = std::numeric_limits<double>::infinity();
  vtx.changed = false;
  graph.add_vertex(NEXT_VID.inc_ret_last(1), vtx);
  return true;
}

// Read a line from a file and creates a vertex
bool vertex_loader_sparse(graph_type& graph, const std::string& fname,
                   const std::string& line) {
  if (line.empty()) return true;

  vertex_data vtx;
  boost::char_separator<char> sep(" ");
  boost::tokenizer< boost::char_separator<char> > tokens(line, sep);
  BOOST_FOREACH (const std::string& t, tokens) {
    std::string::size_type pos = t.find(":");
    if(pos > 0){
      size_t id = (size_t)std::atoi(t.substr(0, pos).c_str());
      double val = std::atof(t.substr(pos+1, t.length() - pos -1).c_str());
      vtx.point_sparse.insert(std::make_pair(id, val));
    }
  }
  vtx.best_cluster = (size_t)(-1);
  vtx.best_distance = std::numeric_limits<double>::infinity();
  vtx.changed = false;
  graph.add_vertex(NEXT_VID.inc_ret_last(1), vtx);
  return true;
}

// Read a line from a file and creates a vertex
bool vertex_loader_with_id(graph_type& graph, const std::string& fname,
                   const std::string& line) {
  if (line.empty()) return true;
  size_t id = 0;
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  vertex_data vtx;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(id) = qi::_1] >> -qi::char_(",") >>
      (qi::double_[phoenix::push_back(phoenix::ref(vtx.point), qi::_1)] % -qi::char_(",") )
      )
     ,
     //  End grammar
     ascii::space);

  if (!success) return false;
  vtx.best_cluster = (size_t)(-1);
  vtx.best_distance = std::numeric_limits<double>::infinity();
  vtx.changed = false;
  graph.add_vertex(id, vtx);
  return true;
}

// Read a line from a file and creates a vertex
bool vertex_loader_with_id_sparse(graph_type& graph, const std::string& fname,
                   const std::string& line) {
  if (line.empty()) return true;

  vertex_data vtx;
  size_t id = 0;
  boost::char_separator<char> sep(" ");
  boost::tokenizer<boost::char_separator<char> > tokens(line, sep);
  bool first = true;
  BOOST_FOREACH (const std::string& t, tokens) {
    if(first){
      id = (size_t)std::atoi(t.c_str());
      first = false;
    }else{
      std::string::size_type pos = t.find(":");
      if(pos > 0){
        size_t id = (size_t)std::atoi(t.substr(0, pos).c_str());
        double val = std::atof(t.substr(pos+1, t.length() - pos -1).c_str());
        vtx.point_sparse.insert(std::make_pair(id, val));
      }
    }
  }
  vtx.best_cluster = (size_t)(-1);
  vtx.best_distance = std::numeric_limits<double>::infinity();
  vtx.changed = false;
  graph.add_vertex(id, vtx);
  return true;
}


//call this when edge weight file is given.
//each line should be [source id] [target id] [weight].
//directions of edges are ignored.
bool edge_loader(graph_type& graph, const std::string& filename,
    const std::string& textline) {
  if (textline.empty())
    return true;
  std::stringstream strm(textline);
  size_t source_vid = 0;
  size_t target_vid = 0;
  double weight = 0.0;
  strm >> source_vid;
  strm.ignore(1);
  strm >> target_vid;
  strm.ignore(1);
  strm >> weight;
  if(source_vid != target_vid)
    graph.add_edge(source_vid, target_vid, edge_data(weight));
  return true;
}


// A set of Map Reduces to compute the maximum and minimum vector sizes
// to ensure that all vectors have the same length
struct max_point_size_reducer: public graphlab::IS_POD_TYPE {
  size_t max_point_size;

  static max_point_size_reducer get_max_point_size(const graph_type::vertex_type& v) {
    max_point_size_reducer r;
    r.max_point_size = v.data().point.size();
    return r;
  }

  max_point_size_reducer& operator+=(const max_point_size_reducer& other) {
    max_point_size = std::max(max_point_size, other.max_point_size);
    return *this;
  }
};

struct min_point_size_reducer: public graphlab::IS_POD_TYPE {
  size_t min_point_size;

  static min_point_size_reducer get_min_point_size(const graph_type::vertex_type& v) {
    min_point_size_reducer r;
    r.min_point_size = v.data().point.size();
    return r;
  }

  min_point_size_reducer& operator+=(const min_point_size_reducer& other) {
    min_point_size = std::min(min_point_size, other.min_point_size);
    return *this;
  }
};


/*
 * This transform vertices call is only used during
 * the initialization phase. It computes distance to
 * cluster[KMEANS_INITIALIZATION] and assigns itself
 * to the new cluster KMEANS_INITIALIZATION if the new distance
 * is smaller that its previous cluster assignment
 */
void kmeans_pp_initialization(graph_type::vertex_type& v) {
  double d = sqr_distance(v.data().point,
                          CLUSTERS[KMEANS_INITIALIZATION].center);
  if (v.data().best_distance > d) {
    v.data().best_distance = d;
    v.data().best_cluster = KMEANS_INITIALIZATION;
  }
}

void kmeans_pp_initialization_sparse(graph_type::vertex_type& v) {
  double d = sqr_distance(v.data().point_sparse,
                          CLUSTERS[KMEANS_INITIALIZATION].center_sparse);
  if (v.data().best_distance > d) {
    v.data().best_distance = d;
    v.data().best_cluster = KMEANS_INITIALIZATION;
  }
}


/*
 * Draws a random sample from the data points that is 
 * proportionate to the "best distance" stored in the vertex.
 */
struct random_sample_reducer {
  std::vector<double> vtx;
  double weight;

  random_sample_reducer():weight(0) { }
  random_sample_reducer(const std::vector<double>& vtx,
                        double weight):vtx(vtx),weight(weight) { }

  static random_sample_reducer get_weight(const graph_type::vertex_type& v) {
    if (v.data().best_cluster == (size_t)(-1)) {
      return random_sample_reducer(v.data().point, 1);
    }
    else {
      return random_sample_reducer(v.data().point,
                                   v.data().best_distance);
    }
  }

  random_sample_reducer& operator+=(const random_sample_reducer& other) {
    double totalweight = weight + other.weight;
    // if any weight is too small, just quit
    if (totalweight <= 0) return *this;

    double myp = weight / (weight + other.weight);
    if (graphlab::random::bernoulli(myp)) {
      weight += other.weight;
      return *this;
    }
    else {
      vtx = other.vtx;
      weight += other.weight;
      return *this;
    }
  }

  void save(graphlab::oarchive &oarc) const {
    oarc << vtx << weight;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> vtx >> weight;
  }
};

struct random_sample_reducer_sparse{
  std::map<size_t, double> vtx;
  double weight;

  random_sample_reducer_sparse():weight(0) { }
  random_sample_reducer_sparse(const std::map<size_t, double>& vtx,
                        double weight):vtx(vtx),weight(weight) { }

  static random_sample_reducer_sparse get_weight(const graph_type::vertex_type& v) {
    if (v.data().best_cluster == (size_t)(-1)) {
      return random_sample_reducer_sparse(v.data().point_sparse, 1);
    }
    else {
      return random_sample_reducer_sparse(v.data().point_sparse,
                                   v.data().best_distance);
    }
  }

  random_sample_reducer_sparse& operator+=(const random_sample_reducer_sparse& other) {
    double totalweight = weight + other.weight;
    // if any weight is too small, just quit
    if (totalweight <= 0) return *this;

    double myp = weight / (weight + other.weight);
    if (graphlab::random::bernoulli(myp)) {
      weight += other.weight;
      return *this;
    }
    else {
      vtx = other.vtx;
      weight += other.weight;
      return *this;
    }
  }

  void save(graphlab::oarchive &oarc) const {
    oarc << vtx << weight;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> vtx >> weight;
  }
};


/*
 * This transform vertices call is used during the 
 * actual k-means iteration. It computes distance to 
 * all "changed" clusters and reassigns itself if necessary
 */
void kmeans_iteration(graph_type::vertex_type& v) {
  // if current vertex's cluster was modified, we invalidate the distance.
  // and we need to recompute to all existing clusters
  // otherwise, we just need to recompute to changed cluster centers.
  size_t prev_asg = v.data().best_cluster;
  if (CLUSTERS[v.data().best_cluster].changed) {
    // invalidate. recompute to all
    v.data().best_cluster = (size_t)(-1);
    v.data().best_distance = std::numeric_limits<double>::infinity();
    for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
      if (CLUSTERS[i].center.size() > 0 || CLUSTERS[i].center_sparse.size() > 0) {
        double d = 0.0;
        if(IS_SPARSE == true)
          d = sqr_distance(v.data().point_sparse, CLUSTERS[i].center_sparse);
        else
          d = sqr_distance(v.data().point, CLUSTERS[i].center);
        if (d < v.data().best_distance) {
          v.data().best_distance = d;
          v.data().best_cluster = i;
        }
      }
    }
  }
  else {
    // just compute distance to what has changed
    for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
      if (CLUSTERS[i].changed &&
          (CLUSTERS[i].center.size() > 0 || CLUSTERS[i].center_sparse.size() > 0)) {
        double d = 0.0;
        if(IS_SPARSE == true)
          d = sqr_distance(v.data().point_sparse, CLUSTERS[i].center_sparse);
        else
          d= sqr_distance(v.data().point, CLUSTERS[i].center);
        if (d < v.data().best_distance) {
          v.data().best_distance = d;
          v.data().best_cluster = i;
        }
      }
    }
  }
  v.data().changed = (prev_asg != v.data().best_cluster);
}

//gathered information
//used when edge weight file is given
struct neighbor_info {
  std::map<size_t, double> cw_map;

  neighbor_info() :
      cw_map() {
  }
  neighbor_info(size_t clst, double weight) :
      cw_map() {
    cw_map.insert(std::make_pair(clst, weight));
  }

  neighbor_info& operator+=(const neighbor_info& other) {
    for (std::map<size_t, double>::const_iterator iter = other.cw_map.begin();
        iter != other.cw_map.end(); iter++) {
      size_t clst = iter->first;
      if (cw_map.find(clst) == cw_map.end()) {
        cw_map.insert(std::make_pair(clst, iter->second));
      } else {
        cw_map[clst] += iter->second;
      }
    }
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << cw_map;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> cw_map;
  }
};

//used when edge weight file is given
class cluster_assignment: public graphlab::ivertex_program<graph_type,
    neighbor_info>, public graphlab::IS_POD_TYPE {
public:
  //gather on all the edges
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }

  //for each edge gather the weights and the assigned clusters of the neighbors
  neighbor_info gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    if (edge.source().id() == vertex.id()) { //out edge
      return neighbor_info(edge.target().data().best_cluster,
          edge.data().weight);
    } else { //in edge
      return neighbor_info(edge.source().data().best_cluster,
          edge.data().weight);
    }
  }

  //assign a cluster, considering the clusters of neighbors
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    size_t past_clst = vertex.data().best_cluster;
    vertex.data().best_cluster = (size_t) (-1);
    vertex.data().best_distance = std::numeric_limits<double>::infinity();
    for (size_t i = 0; i < NUM_CLUSTERS; ++i) {
      if (CLUSTERS[i].center.size() > 0 || CLUSTERS[i].center_sparse.size() > 0) {
        double d = 0.0;
        if(IS_SPARSE == true)
          d = sqr_distance(vertex.data().point_sparse, CLUSTERS[i].center_sparse);
        else
          d = sqr_distance(vertex.data().point, CLUSTERS[i].center);
        //consider neighbors
        const std::map<size_t, double>& cw_map = total.cw_map;
        for (std::map<size_t, double>::const_iterator iter = cw_map.begin();
            iter != cw_map.end(); iter++) {
          size_t neighbor_cluster = iter->first;
          double total_wieght = iter->second;
          if (i == neighbor_cluster)
            d -= total_wieght;
        }
        if (d < vertex.data().best_distance) {
          vertex.data().best_distance = d;
          vertex.data().best_cluster = i;
        }
      }
    }
    vertex.data().changed = (past_clst != vertex.data().best_cluster);
  }

  //send signals to the neighbors when the cluster assignment has changed
  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
    if (vertex.data().changed)
      return graphlab::ALL_EDGES;
    else
      return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
  }
};


/*
 * computes new cluster centers
 * Also accumulates a counter counting the number of vertices which
 * assignments changed.
 */
struct cluster_center_reducer {
  std::vector<cluster> new_clusters;
  size_t num_changed;
  double cost;

  cluster_center_reducer():new_clusters(NUM_CLUSTERS), num_changed(0), cost(0) { }

  static cluster_center_reducer get_center(const graph_type::vertex_type& v) {
    cluster_center_reducer cc;
    ASSERT_NE(v.data().best_cluster, (size_t)(-1));

    if(IS_SPARSE == true)
      cc.new_clusters[v.data().best_cluster].center_sparse = v.data().point_sparse;
    else
      cc.new_clusters[v.data().best_cluster].center = v.data().point;
    cc.new_clusters[v.data().best_cluster].count = 1;
    cc.num_changed = v.data().changed;
    cc.cost = v.data().best_distance;
    return cc;
  }

  cluster_center_reducer& operator+=(const cluster_center_reducer& other) {
    for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
      if (new_clusters[i].count == 0) new_clusters[i] = other.new_clusters[i];
      else if (other.new_clusters[i].count > 0) {
        if(IS_SPARSE == true)
          plus_equal_vector(new_clusters[i].center_sparse, other.new_clusters[i].center_sparse);
        else
          plus_equal_vector(new_clusters[i].center, other.new_clusters[i].center);
        new_clusters[i].count += other.new_clusters[i].count;
      }
    }
    num_changed += other.num_changed;
    cost += other.cost;
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << new_clusters << num_changed <<cost;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> new_clusters >> num_changed >> cost;
  }
};

struct vertex_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    for (size_t i = 0;i < v.data().point.size(); ++i) {
      strm << v.data().point[i] << "\t";
    }
    strm << v.data().best_distance << "\t";
    strm << v.data().best_cluster << "\n";
    strm.flush();
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) { return ""; }
};

struct vertex_writer_sparse {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    for(std::map<size_t, double>::iterator iter = v.data().point_sparse.begin();
        iter != v.data().point_sparse.end();++iter){
      strm << (*iter).first << ":" << (*iter).second << " ";
    }
    strm << v.data().best_cluster << "\n";
    strm.flush();
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) { return ""; }
};

struct vertex_writer_with_id {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t";
    strm << v.data().best_cluster+1 << "\n";
    strm.flush();
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) { return ""; }
};


int main(int argc, char** argv) {
  std::cout << "Computes a K-means clustering of data.\n\n";

  graphlab::command_line_options clopts
    ("K-means clustering. The input data file is provided by the "
     "--data argument which is non-optional. The format of the data file is a "
     "collection of lines, where each line contains a comma or white-space "
     "separated lost of numeric values representing a vector. Every line "
     "must have the same number of values. The required --clusters=N "
     "argument denotes the number of clusters to generate. To store the output "
     "see the --output-cluster and --output-data arguments");

  std::string datafile;
  std::string outcluster_file;
  std::string outdata_file;
  std::string edgedata_file;
  size_t MAX_ITERATION = 0;
  bool use_id = false;
  clopts.attach_option("data", datafile,
                       "Input file. Each line holds a white-space or comma separated numeric vector");
  clopts.attach_option("clusters", NUM_CLUSTERS,
                       "The number of clusters to create.");
  clopts.attach_option("output-clusters", outcluster_file,
                       "If set, will write a file containing cluster centers "
                       "to this filename. This must be on the local filesystem "
                       "and must be accessible to the root node.");
  clopts.attach_option("output-data", outdata_file,
                       "If set, will output a copy of the input data with an additional "
                       "two columns. The first added column is the distance to assigned "
		       "center and the last is the assigned cluster centers. The output "
                       "will be written to a sequence of filenames where each file is "
                       "prefixed by this value. This may be on HDFS.");
  clopts.attach_option("sparse", IS_SPARSE,
                       "If set to true, will use a sparse vector representation."
                       "The file format is [feature id]:[value] [feature id]:[value] ..."
                       ", where [feature id] must be positive integer or zero.");
  clopts.attach_option("id", use_id,
                       "If set to true, will use ids for data points. The id of a data point "
                       "must be written at the head of each line of the input data. "
                       "The output data will consist of two columns: the first one "
                       "denotes the ids; the second one denotes the assigned clusters.");
  clopts.attach_option("pairwise-reward", edgedata_file,
                       "If set, will consider pairwise rewards when clustering. "
                       "Each line of the file beginning with the argument holds [id1] [id2] "
                       "[reward]. This mode must be used with --id option.");
  clopts.attach_option("max-iteration", MAX_ITERATION,
                       "The max number of iterations");

  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (datafile == "") {
    std::cout << "--data is not optional\n";
    return EXIT_FAILURE;
  }
  if (NUM_CLUSTERS == 0) {
    std::cout << "--clusters is not optional\n";
    return EXIT_FAILURE;
  }
  if(edgedata_file.size() > 0){
    if(use_id == false){
      std::cout << "--id is not optional when you use edge data\n";
      return EXIT_FAILURE;
    }
  }

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // load graph
  graph_type graph(dc, clopts);
  NEXT_VID = (((graphlab::vertex_id_type)1 << 31) / dc.numprocs()) * dc.procid();
  if(IS_SPARSE == true){
    if(use_id){
      graph.load(datafile, vertex_loader_with_id_sparse);
    }else{
      graph.load(datafile, vertex_loader_sparse);
    }
  }else{
    if(use_id){
      graph.load(datafile, vertex_loader_with_id);
    }else{
      graph.load(datafile, vertex_loader);
    }
  }
  if(edgedata_file.size() > 0){
    graph.load(edgedata_file, edge_loader);
  }
  graph.finalize();
  dc.cout() << "Number of datapoints: " << graph.num_vertices() << std::endl;

  if (graph.num_vertices() < NUM_CLUSTERS) {
    dc.cout() << "More clusters than datapoints! Cannot proceed" << std::endl;
    return EXIT_FAILURE;
  }

  dc.cout() << "Validating data...";


  CLUSTERS.resize(NUM_CLUSTERS);
  // make sure all have the same array length
  if(IS_SPARSE == false){
    size_t max_p_size = graph.map_reduce_vertices<max_point_size_reducer>
                                  (max_point_size_reducer::get_max_point_size).max_point_size;
    size_t min_p_size = graph.map_reduce_vertices<min_point_size_reducer>
                                  (min_point_size_reducer::get_min_point_size).min_point_size;
    if (max_p_size != min_p_size) {
      dc.cout() << "Data has dimensionality ranging from " << min_p_size << " to " << max_p_size
                << "! K-means cannot proceed!" << std::endl;
      return EXIT_FAILURE;
    }
    // allocate clusters
    for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
      CLUSTERS[i].center.resize(max_p_size);
    }
  }

  dc.cout() << "Initializing using Kmeans++\n";
  // ok. perform kmeans++ initialization
  for (KMEANS_INITIALIZATION = 0;
       KMEANS_INITIALIZATION < NUM_CLUSTERS;
       ++KMEANS_INITIALIZATION) {

    if(IS_SPARSE == true){
      random_sample_reducer_sparse rs = graph.map_reduce_vertices<random_sample_reducer_sparse>
                                        (random_sample_reducer_sparse::get_weight);
      CLUSTERS[KMEANS_INITIALIZATION].center_sparse = rs.vtx;
      graph.transform_vertices(kmeans_pp_initialization_sparse);
    }else{
      random_sample_reducer rs = graph.map_reduce_vertices<random_sample_reducer>
                                        (random_sample_reducer::get_weight);
      CLUSTERS[KMEANS_INITIALIZATION].center = rs.vtx;
      graph.transform_vertices(kmeans_pp_initialization);
    }
  }

  // "reset" all clusters
  for (size_t i = 0; i < NUM_CLUSTERS; ++i) CLUSTERS[i].changed = true;
  // perform Kmeans iteration

  dc.cout() << "Running Kmeans...\n";
  bool clusters_changed = true;
  size_t iteration_count = 0;
  while(clusters_changed) {
		if(MAX_ITERATION > 0 && iteration_count >= MAX_ITERATION)
			break;

    cluster_center_reducer cc = graph.map_reduce_vertices<cluster_center_reducer>
                                    (cluster_center_reducer::get_center);
    // the first round (iteration_count == 0) is not so meaningful
    // since I am just recomputing the centers from the output of the KMeans++
    // initialization
    if (iteration_count > 0) {
      dc.cout() << "Kmeans iteration " << iteration_count << ": " <<
                 "# points with changed assignments = " << cc.num_changed << 
		 " total cost: " << cc.cost << std::endl;
    }
    for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
      double d = cc.new_clusters[i].count;
      if(IS_SPARSE){
        if (d > 0) scale_vector(cc.new_clusters[i].center_sparse, 1.0 / d);
        if (cc.new_clusters[i].count == 0 && CLUSTERS[i].count > 0) {
          dc.cout() << "Cluster " << i << " lost" << std::endl;
          CLUSTERS[i].center_sparse.clear();
          CLUSTERS[i].count = 0;
          CLUSTERS[i].changed = false;
        }
        else {
          CLUSTERS[i] = cc.new_clusters[i];
          CLUSTERS[i].changed = true;
        }
      }else{
        if (d > 0) scale_vector(cc.new_clusters[i].center, 1.0 / d);
        if (cc.new_clusters[i].count == 0 && CLUSTERS[i].count > 0) {
          dc.cout() << "Cluster " << i << " lost" << std::endl;
          CLUSTERS[i].center.clear();
          CLUSTERS[i].count = 0;
          CLUSTERS[i].changed = false;
        }
        else {
          CLUSTERS[i] = cc.new_clusters[i];
          CLUSTERS[i].changed = true;
        }
      }
    }
    clusters_changed = iteration_count == 0 || cc.num_changed > 0;

    if(edgedata_file.size() > 0){
      clopts.engine_args.set_option("factorized", true);
      graphlab::omni_engine<cluster_assignment> engine(dc, graph, "async", clopts);
      engine.signal_all();
      engine.start();
    }else{
      graph.transform_vertices(kmeans_iteration);
    }

    ++iteration_count;
  }


  if (!outcluster_file.empty() && dc.procid() == 0) {
    dc.cout() << "Writing Cluster Centers..." << std::endl;
    std::ofstream fout(outcluster_file.c_str());
    if(IS_SPARSE){
      for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
        if(use_id)
          fout << i+1 << "\t";
        for (std::map<size_t, double>::iterator iter = CLUSTERS[i].center_sparse.begin();
             iter != CLUSTERS[i].center_sparse.end();++iter) {
          fout << (*iter).first << ":" << (*iter).second << " ";
        }
        fout << "\n";
      }
    }else{
      for (size_t i = 0;i < NUM_CLUSTERS; ++i) {
        if(use_id)
          fout << i+1 << "\t";
        for (size_t j = 0; j < CLUSTERS[i].center.size(); ++j) {
          fout << CLUSTERS[i].center[j] << " ";
        }
        fout << "\n";
      }
    }
  }

  if (!outdata_file.empty()) {
    dc.cout() << "Writing Data with cluster assignments...\n" << std::endl;
    if(use_id){
      graph.save(outdata_file, vertex_writer_with_id(), false, true, false, 1);
    }else{
      if(IS_SPARSE == true)
        graph.save(outdata_file, vertex_writer_sparse(), false, true, false, 1);
      else
        graph.save(outdata_file, vertex_writer(), false, true, false, 1);
    }
  }

  graphlab::mpi_tools::finalize();
}


================================================
FILE: toolkits/clustering/spectral_clustering.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <string>
#include <vector>
#include <map>
#include <iostream>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <graphlab.hpp>

#include <graphlab/util/fs_util.hpp>
#include <boost/filesystem.hpp>

//remove assigned options from arguments
std::string get_arg_str_without(int argc, char** argv,
    std::vector<std::string> remove_opts) {
  std::stringstream strm;
  bool skip_next = false;
  for (int i = 1; i < argc; ++i) {
    bool skip = false;
    for (size_t j = 0; j < remove_opts.size(); ++j) {
      std::string with_equal = remove_opts[j] + "=";
      if (strncmp(with_equal.c_str(), argv[i], with_equal.size()) == 0) {
        skip = true;
      } else if (strncmp(remove_opts[j].c_str(), argv[i], remove_opts[j].size())
          == 0) {
        skip = true;
        skip_next = true;
      }
    }
    if (skip == false && skip_next == false) {
      strm << argv[i] << " ";
    } else if (skip == false && skip_next == true) {
      skip_next = false;
    }
  }
  return strm.str();
}

bool call_graph_laplacian_construction(const std::string& mpi_args,
    const std::string& filename, const float sigma, const float epsilon,
    const size_t num_nearests, const std::string& args) {
  std::stringstream strm;
  if (mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << "./graph_laplacian_for_sc ";
  strm << " --data=" << filename;
  strm << " --sigma=" << sigma;
  strm << " --similarity-thres=" << epsilon;
  strm << " --t-nearest=" << num_nearests;
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

void make_initial_vector_file(const std::string& filename, const size_t num_data){
  std::ofstream ofs((filename + ".init").c_str());
  for(size_t i=0;i<num_data;++i){
    ofs << 0.1*((i+1)%10)/10.0 << "\n";
  }
  ofs.close();
}

bool call_svd(const std::string& mpi_args, const std::string& filename,
    const std::string& svd_dir, const size_t num_clusters, const size_t rank,
    const size_t num_data, const std::string& args) {
  make_initial_vector_file(filename, num_data+1);
  std::stringstream strm;
  if (mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << svd_dir << "svd " + filename + ".glap";
  strm << " --rows=" << num_data+1;
  strm << " --cols=" << num_data;
  strm << " --nsv=" << num_clusters;
  strm << " --nv=" << rank;
//  strm << " --tol=1e-10";
//  strm << " --max_iter=20";
  strm << " --quiet=1";
  strm << " --input_file_offset=1";
  strm << " --save_vectors=1";
  strm << " --ortho_repeats=3";
//  strm << " --id=1";
  strm << " --prediction=" << filename;
  strm << " --initial_vector=" << filename + ".init";
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

bool call_eigen_vector_normalization(const std::string& mpi_args,
    const std::string& filename, const std::string& graph_analytics_dir,
    const size_t num_clusters, const size_t rank, const size_t num_data,
    const std::string& args) {
  std::stringstream strm;
  if (mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << graph_analytics_dir << "eigen_vector_normalization";
  strm << " --data=" << filename;
  strm << " --clusters=" << num_clusters;
  strm << " --rank=" << rank;
  strm << " --data-num=" << num_data;
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

bool call_kmeans(const std::string& mpi_args, const std::string& filename,
    const std::string& kmeans_dir, const size_t num_clusters,
    const std::string& args) {
  //call svd
  std::stringstream strm;
  if (mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << kmeans_dir << "kmeans ";
  strm << " --data " << filename << ".compressed";
  strm << " --clusters " << num_clusters;
  strm << " --output-data " << filename << ".result";
  strm << " --id=1";
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }
  return true;
}


bool call_kmeans_as_preprocess(const std::string& mpi_args, const std::string& filename,
    const std::string& kmeans_dir, const size_t num_clusters,
    const std::string& args) {
  //call svd
  std::stringstream strm;
  if (mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << kmeans_dir << "kmeans ";
  strm << " --data " << filename;
  strm << " --clusters " << num_clusters;
  strm << " --output-data " << filename << ".pre.labels";
  strm << " --output-clusters " << filename << ".pre.centers";
  strm << " --id=1";
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }
  return true;
}


//select good value of rank (TODO)
int get_lanczos_rank(const size_t num_clusters, const size_t num_data) {
  size_t rank = 1;
  if (num_data < 1000) {
    if (num_clusters + 10 <= num_data)
      rank = num_clusters + 10;
    else
      rank = num_data;
  } else if (num_data < 10000) {
    rank = num_clusters + 100;
  } else if (num_data < 100000) {
    rank = num_clusters + 150;
  } else if (num_data < 1000000) {
    rank = num_clusters + 200;
  } else {
    rank = num_clusters + 300;
  }
  return rank;
//  return num_clusters+2;
}

void read_pairs_with_prefix(std::vector<std::vector<size_t> >& ret, const std::string& prefix){
  std::string directory_name;
  std::string original_path(prefix);
  boost::filesystem::path path(prefix);
  std::string search_prefix;
  if (boost::filesystem::is_directory(path)) {
    // if this is a directory
    // force a "/" at the end of the path
    // make sure to check that the path is non-empty. (you do not
    // want to make the empty path "" the root path "/" )
    directory_name = path.native();
  }
  else {
    directory_name = path.parent_path().native();
    search_prefix = path.filename().native();
    directory_name = (directory_name.empty() ? "." : directory_name);
  }
  std::vector<std::string> files;
  graphlab::fs_util::list_files_with_prefix(directory_name, search_prefix, files);
  if (files.size() == 0) {
    logstream(LOG_WARNING) << "No files found matching " << original_path << std::endl;
  }
  for(size_t i = 0; i < files.size(); ++i) {
    std::ifstream ifs(files[i].c_str());
    if (!ifs) {
      std::cout << "can't read " << files[i] << std::endl;
      return;
    }
    while( !ifs.eof() ) {
      std::vector<size_t> pair;
      size_t id = 0;
      size_t label = 0;
      ifs >> id;
//      ifs.ignore(1);
      ifs >> label;
      if(id > 0 && label > 0){
        pair.push_back(id);
        pair.push_back(label);
        ret.push_back(pair);
      }
    }
  }
}

int recover_labels(const std::string& prefix){
  const std::string kmeans_result_prefix = prefix + ".pre.labels";
  const std::string spectral_result_prefix = prefix + ".pre.centers.result";
  const std::string outfile = prefix + ".result_1_of_1";

  std::vector<std::vector<size_t> > kmeans_result;
  read_pairs_with_prefix(kmeans_result, kmeans_result_prefix);
  std::vector<std::vector<size_t> > spectral_result;
  read_pairs_with_prefix(spectral_result, spectral_result_prefix);

  std::map<size_t, size_t> label_map;
  for(size_t i=0;i<spectral_result.size();++i){
    label_map.insert(std::make_pair(spectral_result[i][0], spectral_result[i][1]));
  }

  std::ofstream ofs(outfile.c_str());
  for(size_t i=0;i<kmeans_result.size();++i){
    ofs << kmeans_result[i][0] << "\t";
    ofs << label_map[kmeans_result[i][1]] << "\n";
  }

  return 0;
}

int main(int argc, char** argv) {
  std::cout << "Spectral clustering\n\n";
  time_t start, end, mid;
  std::vector<std::pair<std::string,time_t> > times;
  time(&start);

  std::string datafile;
  std::string graph_analytics_dir = "../graph_analytics/";
  std::string svd_dir = "../collaborative_filtering/";
  std::string kmeans_dir = "./";
  std::string mpi_args;
  size_t num_clusters = 0;
  size_t num_nearests = 30;
  float sigma = 1.0;
  float epsilon = 0.0;
  size_t pre_kmeans_clusters = 0;
  size_t sv = 0;
  //parse command line
  graphlab::command_line_options clopts(
          "Spectral clustering. The input data file is provided by the "
          "--data argument which is non-optional. The format of the data file is a "
          "collection of lines, where each line contains a data id followed by a "
          "comma or white-space separated list of numeric values representing a vector. "
          "Every line must have the same number of values. The required --clusters=N "
          "argument denotes the number of clusters to generate.");
  clopts.attach_option("data", datafile,
          "Input file. Each line holds a data id followed by a white-space "
          "or comma separated numeric vector");
  clopts.attach_option("clusters", num_clusters,
          "The number of clusters to create");
  clopts.attach_option("sigma", sigma,
          "Scale parameter for Gaussian kernel");
  clopts.attach_option("t-nearest", num_nearests,
          "Number of nearest neighbors (=t). Will use only the t-nearest similarities "
          "for each datapoint. If set at 0, will use all similarities.");
  clopts.attach_option("similarity-thres", epsilon,
          "Threshold to discard small similarities");
  clopts.attach_option("svd-dir", svd_dir,
          "Path to the directory where Graphlab svd is located");
  clopts.attach_option("kmeans-dir", kmeans_dir,
          "Path to the directory where Graphlab kmeans is located");
  clopts.attach_option("graph-analytics-dir", graph_analytics_dir,
          "Path to the directory where Graphlab eigen_vector_normalization is located");
  clopts.attach_option("pre-kmeans-clusters", pre_kmeans_clusters,
          "If set, will perform kmeans as a preprocess with the given cluster number.");
  clopts.attach_option("mpi-args", mpi_args,
          "If set, will execute mipexec with the given arguments. "
          "For example, --mpi-args=\"-n [N machines] --hostfile [host file]\"");
  clopts.attach_option("sv", sv,
          "Number of vectors in each iteration in the Lanczos svd.");
  if (!clopts.parse(argc, argv))
    return EXIT_FAILURE;
  if (datafile == "") {
    std::cout << "--data is not optional\n";
    return EXIT_FAILURE;
  }
  if (num_clusters == 0) {
    std::cout << "--cluster is not optional\n";
    return EXIT_FAILURE;
  }

  std::vector<std::string> remove_opts;
  remove_opts.push_back("--data");
  remove_opts.push_back("--svd-dir");
  remove_opts.push_back("--graph-analytics-dir");
  remove_opts.push_back("--kmeans-dir");
  remove_opts.push_back("--clusters");
  remove_opts.push_back("--sigma");
  remove_opts.push_back("--similarity-thres");
  remove_opts.push_back("--mpi-args");
  remove_opts.push_back("--t-nearest");
  remove_opts.push_back("--pre-kmeans-clusters");
  remove_opts.push_back("--sv");
  std::string other_args = get_arg_str_without(argc, argv, remove_opts);

  //preprocess by kmeans for fast clustering
  if(pre_kmeans_clusters > 0){
    if(pre_kmeans_clusters < num_clusters){
      std::cout << "the number of --pre-kmeans-clusters must be bigger than the number of clusters\n";
      return EXIT_FAILURE;
    }
    time(&mid);
    if(call_kmeans_as_preprocess(mpi_args, datafile, kmeans_dir, pre_kmeans_clusters, other_args) == false)
      return EXIT_FAILURE;
    //modify settings
    datafile = datafile + ".pre.centers";
    num_nearests = 0;
    time(&end);
    times.push_back(std::pair<std::string, time_t>("kmeans preprocess",(end - mid)));
  }

  //construct graph laplacian
  time(&mid);
  if (call_graph_laplacian_construction(mpi_args, datafile, sigma, epsilon,
      num_nearests, other_args) == false) {
    return EXIT_FAILURE;
  }
  time(&end);
  times.push_back(std::pair<std::string, time_t>("graph laplacian",(end - mid)));

  //eigen value decomposition
  //read number of data
  size_t num_data = 0;
  const std::string datanum_filename = datafile + ".datanum";
  std::ifstream ifs(datanum_filename.c_str());
  if (!ifs) {
    std::cout << "can't read number of data." << std::endl;
    return EXIT_FAILURE;
  }
  ifs >> num_data;
  //determine the sv of Lanczos method
  if(sv == 0){
    sv = get_lanczos_rank(num_clusters, num_data);
  }else{
    if(sv < num_clusters)
      sv = num_clusters;
  }
  time(&mid);
  if (call_svd(mpi_args, datafile, svd_dir, num_clusters, sv, num_data,
      other_args) == false) {
    return EXIT_FAILURE;
  }
  if (call_eigen_vector_normalization(mpi_args, datafile, graph_analytics_dir,
      num_clusters, sv, num_data, other_args) == false) {
    return EXIT_FAILURE;
  }
  time(&end);
  times.push_back(std::pair<std::string, time_t>("eigen decomposition",(end - mid)));

  //run kmeans
  time(&mid);
  if (call_kmeans(mpi_args, datafile, kmeans_dir, num_clusters, other_args)
      == false) {
    return EXIT_FAILURE;
  }
  time(&end);
  times.push_back(std::pair<std::string, time_t>("kmeans",(end - mid)));

  //recover cluster membership if preprocess with kmeans was done
  if(pre_kmeans_clusters > 0){
    //remove ".pre.centers"
    datafile = datafile.substr(0, datafile.size() - 12);
    recover_labels(datafile);
  }

  time(&end);

  std::cout << "computation times:\n";
  for(size_t i=0;i<times.size();++i){
    std::cout << "process " << i+1 << "\t" << times[i].first << "\t" << times[i].second << " sec\n";
  }
  std::cout << "Overall processing time of spectral clustering is " << (end - start) << " sec\n";

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/collaborative_filtering/CMakeLists.txt
================================================
project(GraphLab)
# include(CheckCXXSourceCompiles)


# Build als
add_graphlab_executable(als als.cpp)
requires_eigen(als) # build and attach eigen

add_graphlab_executable(sparse_als sparse_als.cpp)
requires_eigen(sparse_als) # build and attach eigen

add_graphlab_executable(wals wals.cpp)
requires_eigen(wals) # build and attach eigen

add_graphlab_executable(sgd sgd.cpp)
requires_eigen(sgd) # build and attach eigen

add_graphlab_executable(biassgd biassgd.cpp)
requires_eigen(biassgd) # build and attach eigen

add_graphlab_executable(svdpp svdpp.cpp)
requires_eigen(svdpp) # build and attach eigen

add_graphlab_executable(svd svd.cpp)
requires_eigen(svd) # build and attach eigen


add_graphlab_executable(nmf nmf.cpp)
requires_eigen(nmf) # build and attach eigen

add_graphlab_executable(make_synthetic_als_data
  make_synthetic_als_data.cpp)
requires_eigen(make_synthetic_als_data) # build and attach eigen

add_graphlab_executable(adpredictor adpredictor.cpp)

# add_graphlab_executable(warp_nmf warp_nmf.cpp)
# requires_eigen(warp_sgd) # build and attach eigen
# 
# add_graphlab_executable(warp_als_coord warp_als_coord.cpp)
# requires_eigen(warp_als_coord) # build and attach eigen


================================================
FILE: toolkits/collaborative_filtering/adpredictor.cpp
================================================
/**  
 * Copyright (c) 2013 GraphLab Inc.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * Implementation of the adpredictor algorithm as given in the paper:
 * Web-Scale Bayesian Click-Through Rate Prediction for Sponsored Search Advertising in Microsoft’s Bing Search Engine
 * Thore Graepel, Joaquin Quinonero Candela, Thomas Borchert, and Ralf Herbrich
 * ICML 2010
 * Implemented by Danny Bickson, GraphLab, Inc.
 *
 */


#include <graphlab.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab/warp.hpp>
#include "stats.hpp"
#include "cdf.hpp"

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
const double pi = 3.14159265;
const double gaussian_normalization = 1/sqrt(2 * pi);
double beta = 1;
bool debug = false;


enum data_role_type { TRAIN = 0, VALIDATE = 1, PREDICT =2 };

data_role_type mode;
/** 
 */
struct vertex_data : graphlab::IS_POD_TYPE{
	int y;
	float xT_mu; 
	float sigma;
	float predict;
	float err;
	float likelihood;
	float weights;
	data_role_type type;

	vertex_data() {
		xT_mu = 0;
		y = 0;
		sigma  = 1;
		predict = 0;
		err = 0;
		likelihood = 0;
		weights = 0;
		type = TRAIN;
	}


}; // end of vertex data


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data adpredictoro stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
	/**
	 * \brief The type of data on the edge;
	 *
	 * \li *Train:* the observed value is correct and used in training
	 * \li *Validate:* the observed value is correct but not used in training
	 * \li *Predict:* The observed value is not correct and should not be
	 *        used in training.
	 */

	/** \brief the observed value for the edge */
	float x_ij;

	/** \brief The train/validation/test designation of the edge */
	data_role_type role;

	/** \brief basic initialization */
	edge_data(float x_ij = 1, data_role_type role = TRAIN) :
		x_ij(x_ij), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/* compute v(t) according to equation (9) left */
double v(double t){
        double phit = phi(t);
        if (phit == 0)
            phit = 1e-5;
	double ret = gaussian_normalization * exp(-t*t/2) / phit;
        if (std::isinf(ret)){
           std::cout<<"BUG: " << ret << " " << t << " " << exp(-t*t/2) << " phi(t)" << phi(t) << std::endl;
           assert(false);
        }
        return ret;
}

/* compute w(t) according to equation (9) right */
double w(double t){
	double vt = v(t);
	return vt * (vt+t);
}


struct gather_type: public graphlab::IS_POD_TYPE{
	float sigma;
	float mu;
	float mult_sigma;

	gather_type(){ sigma = 0; mu = 0; mult_sigma = 1; }
	gather_type& operator+=(const gather_type& other) {
		sigma += other.sigma;
		mu += other.mu;
		mult_sigma *= other.mult_sigma;
		return *this;
	}
};

/** compute probability for click as given in equation (2) */
float ctr_predict( const vertex_data& data, 
		const float rating, 
		double & prediction, 
		void * extra = NULL){

	assert(beta > 0);
	prediction = data.xT_mu;
	double prob = phi(data.xT_mu * data.y / beta);
	if (debug)
		//std::cout<<"prediction: " << prediction << " y: " << data.y << std::endl;
		printf("prediction %12.8lf y: %d \n", prediction, data.y);
	return prob; 
}


gather_type adpredictor_map(graph_type::edge_type edge, graph_type::vertex_type other) {
	gather_type ret;
	assert(edge.data().x_ij  == 1);
	/* compute equation (6) */
	ret.sigma = edge.data().x_ij * other.data().sigma;
	ret.mu = edge.data().x_ij * other.data().xT_mu;			
	return ret;
}

// the function arguments of the combiner must match the return type of the
// map function.
void adpredictor_combine2(gather_type &a, const gather_type & b, const vertex_data unused) {
	a.mu += b.mu;
	a.mult_sigma *= b.mult_sigma;
}


gather_type adpredictor_map2(graph_type::edge_type edge, graph_type::vertex_type other, vertex_data  vertex){
	gather_type ret;
	assert(vertex.sigma > 0);
	assert(other.data().y == -1 || other.data().y == 1);
        assert(edge.data().x_ij == 1);
	double product = other.data().y * other.data().xT_mu / sqrt(other.data().sigma);
	//assert(product > 0);
	ret.mu = (other.data().y * edge.data().x_ij * vertex.sigma / sqrt(other.data().sigma))  * v(product);
        if (std::isinf(ret.mu)){
          std::cout<<"BUG: " << ret.mu << " vertex.sigma " << vertex.sigma << " other.data().sigma " << other.data().sigma << " v(prod) " << v(product);
          assert(false);
        }
	double factor = 1.0 - (edge.data().x_ij * vertex.sigma / other.data().sigma)*w(product);
        if (factor <= 0){
          std::cout<<"BUG: " << product << " " << ret.mu << " " << factor <<std::endl;
	  assert(factor > 0);
        }
	ret.sigma = factor;
	return ret;
}

void adpredictor_update(graph_type::vertex_type vertex) {
	//go over all row nodes
	if ( vertex.num_out_edges() > 0){
                if (debug) printf("Entered vertex %lu role %d \n", vertex.id(), vertex.data().type);
		if (vertex.data().type == TRAIN){
			vertex_data & row = vertex.data(); 
			row.likelihood = 0;
			row.err = 0;
			assert(row.y == -1 || row.y == 1);
			assert(beta > 0);

			if (debug)
				std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl;
			row.sigma = beta*beta;
			row.xT_mu = 0;

			gather_type sum = graphlab::warp::map_reduce_neighborhood<gather_type>(vertex, graphlab::OUT_EDGES, adpredictor_map);
			row.sigma = sum.sigma;
			row.xT_mu = sum.mu;

			double prediction;
			double ret = ctr_predict(row, row.y, prediction);
			double predicted_target = prediction < 0 ? -1: 1;
			if ((predicted_target == -1  && row.y == 1) || (predicted_target == 1 && row.y == -1))
				row.err += 1.0;  
			if (debug)
				std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl;
			row.likelihood += ret;

			assert(row.sigma > 0);
		}
		else if (vertex.data().type == VALIDATE || vertex.data().type == PREDICT){
			vertex_data & row = vertex.data(); 
			row.likelihood = 0;
			row.err = 0;
			if (vertex.data().type == VALIDATE) 
     			 	assert(row.y == -1 || row.y == 1);
			gather_type sum = graphlab::warp::map_reduce_neighborhood<gather_type>(vertex, graphlab::OUT_EDGES, adpredictor_map);
			row.predict = sum.mu;
			double predict = sum.mu > 0 ? 1 : -1;                       
			if (predict != row.y && vertex.data().type == VALIDATE)
				row.err++;
		}
                else assert(false);

	}
}

void adpredictor_update2(graph_type::vertex_type vertex) {
	if (vertex.num_in_edges() > 0){
		gather_type sum = graphlab::warp::map_reduce_neighborhood<gather_type>(vertex, graphlab::IN_EDGES, vertex.data(), adpredictor_map2,adpredictor_combine2);
		vertex.data().sigma *= sum.mult_sigma;
		vertex.data().xT_mu += sum.mu;
	}
}
gather_type count_vertices(const graph_type::vertex_type& vertex) {
	gather_type ret;
	if (vertex.data().type == TRAIN){
		ret.mu = 1;
	}
	else if (vertex.data().type == VALIDATE){
		ret.sigma = 1;
	}
	return ret;
}

struct model_saver {
	typedef graph_type::vertex_type vertex_type;
	typedef graph_type::edge_type   edge_type;
	/* save the linear model, using the format:
	 */
	std::string save_vertex(const vertex_type& vertex) const {
                if (vertex.num_in_edges() == 0 || vertex.data().type != TRAIN)
		  return "";

		std::stringstream strm;
		strm << vertex.id() << " " << vertex.data().xT_mu << " " << std::endl;
		return strm.str();
	}
	std::string save_edge(const edge_type& edge) const {
		return "";
	}
}; // end of prediction_saver

struct prediction_saver {
	typedef graph_type::vertex_type vertex_type;
	typedef graph_type::edge_type   edge_type;
	/* save the linear model, using the format:
	 */
	std::string save_vertex(const vertex_type& vertex) const {
                if (vertex.num_out_edges() == 0 || vertex.data().type != PREDICT)
		  return "";

		std::stringstream strm;
		strm << vertex.id() << " " << vertex.data().predict << " " << std::endl;
		return strm.str();
	}
	std::string save_edge(const edge_type& edge) const {
		return "";
	}
}; // end of prediction_saver


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
		const std::string& filename,
		const std::string& line) {
	ASSERT_FALSE(line.empty()); 

	// Parse the line
	std::stringstream strm(line);
	float weight = 0;
	float label = 0;

	strm >> label;
	if (label != -1 && label != 1)
		logstream(LOG_FATAL)<<"Each line must have label -1 or 1 as the first item in the row. Row was : " << line << " label: " << label << std::endl;

	// Determine the role of the data
	data_role_type role = TRAIN;
	if(boost::ends_with(filename,".validate")) role = VALIDATE;
	else if(boost::ends_with(filename, ".predict")) role = PREDICT;

	int myid = rand();
	int num_vals = 0;
	while (strm.good()) {
		graphlab::vertex_id_type target;
		strm >> target;
		if (strm.fail()) break;
		char col;
		strm >> col;
		if (strm.fail()) break;
		strm >> weight;
		if (strm.fail()) break;
		if (weight != 1)
			logstream(LOG_FATAL)<<"Currently we support only binary edges. Line was: " << line << " in file: " << filename << std::endl;
		num_vals++;
		target = -(graphlab::vertex_id_type(target + SAFE_NEG_OFFSET));
		graph.add_edge(myid, target, edge_data(weight, role));
	}  

	if (num_vals == 0)
		logstream(LOG_FATAL)<<"Failed to load line: " << line << " in file: " << filename << std::endl;

	vertex_data data;
	data.y = label;
	data.type = role;
        if (debug) printf("Adding vertex %u with role %d\n", myid, role);
	graph.add_vertex(myid, data);
	return true; // successful load
} // end of graph_loader


int MAX_ITER = 5;

gather_type calc_error(const graph_type::vertex_type& vertex) {
	gather_type ret;
	if (mode == vertex.data().type){
		ret.mu = vertex.data().err;
		ret.sigma = vertex.data().likelihood;
	}
	return ret;
}


int main(int argc, char** argv) {
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	// Parse command line options -----------------------------------------------
	const std::string description = 
		"adPredictor algorithm";
	graphlab::command_line_options clopts(description);
	std::string input_dir;
	std::string save_model;
	std::string exec_type = "synchronous";
	clopts.attach_option("matrix", input_dir,
			"The directory containing the matrix file");
	clopts.add_positional("matrix");
	clopts.attach_option("max_iter", MAX_ITER,
			"The maxumum number of udpates allowed for a vertex");
	clopts.attach_option("debug", debug, 
			"debug - additional verbose info"); 
	clopts.attach_option("save_model", save_model,
			"The prefix (folder and filename) to save predictions.");
	clopts.attach_option("beta", beta, "gaussian bandwidth");

	if(!clopts.parse(argc, argv) || input_dir == "") {
		std::cout << "Error in parsing command line arguments." << std::endl;
		clopts.print_description();
		return EXIT_FAILURE;
	}

	graphlab::mpi_tools::init(argc, argv);
	graphlab::distributed_control dc;

	dc.cout() << "Loading graph." << std::endl;
	graphlab::timer timer; 
	graph_type graph(dc, clopts);  
	graph.load(input_dir, graph_loader); 
	dc.cout() << "Loading graph. Finished in " 
		<< timer.current_time() << std::endl;
	dc.cout() << "Finalizing graph." << std::endl;
	timer.start();
	graph.finalize();
	dc.cout() << "Finalizing graph. Finished in " 
		<< timer.current_time() << std::endl;


	dc.cout() 
		<< "========== Graph statistics on proc " << dc.procid() 
		<< " ==============="
		<< "\n Num vertices: " << graph.num_vertices()
		<< "\n Num edges: " << graph.num_edges()
		<< "\n Num replica: " << graph.num_replicas()
		<< "\n Replica to vertex ratio: " 
		<< float(graph.num_replicas())/graph.num_vertices()
		<< "\n --------------------------------------------" 
		<< "\n Num local own vertices: " << graph.num_local_own_vertices()
		<< "\n Num local vertices: " << graph.num_local_vertices()
		<< "\n Replica to own ratio: " 
		<< (float)graph.num_local_vertices()/graph.num_local_own_vertices()
		<< "\n Num local edges: " << graph.num_local_edges()
		//<< "\n Begin edge id: " << graph.global_eid(0)
		<< "\n Edge balance ratio: " 
		<< float(graph.num_local_edges())/graph.num_edges()
		<< std::endl;

	dc.cout() << "Running adPredictor" << std::endl;
	dc.cout() << "(C) Code by Danny Bickson, GraphLab Inc. " << std::endl;
	dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
	timer.start();

	gather_type edge_count = graph.map_reduce_vertices<gather_type>(count_vertices);
	dc.cout()<<"Training rows: " << edge_count.mu << " validation rows: " << edge_count.sigma << std::endl;
        if (edge_count.mu <= 0)
          logstream(LOG_FATAL)<< "Failed to read training data. Aborting" << std::endl;

	graphlab::timer mytimer; mytimer.start();

	for (int i = 0; i < MAX_ITER; ++i) {
		graphlab::warp::parfor_all_vertices(graph, adpredictor_update); 
		graphlab::warp::parfor_all_vertices(graph, adpredictor_update2); 
		mode = TRAIN;
		gather_type ret = graph.map_reduce_vertices<gather_type>(calc_error);
		dc.cout() << i << ") Log likelihood: " << std::setw(10) << ret.sigma << " Avg error: " << std::setw(10) << ret.mu/edge_count.mu << std::endl; 
		mode = VALIDATE;
		ret = graph.map_reduce_vertices<gather_type>(calc_error);
		dc.cout() << i << " Avg validation error: " << std::setw(10) << ret.mu/edge_count.sigma << std::endl; 

	}

	const double runtime = timer.current_time();
	dc.cout() << "----------------------------------------------------------"
		<< std::endl
		<< "Final Runtime (seconds):   " << runtime << std::endl;


	// Make predictions ---------------------------------------------------------
	if(!save_model.empty()) {
		std::cout << "Saving predictions" << std::endl;
		const bool gzip_output = false;
		const bool save_vertices = true;
		const bool save_edges = false;
		const size_t threads_per_machine = 1;
		//save the predictions
		graph.save(save_model, model_saver(), gzip_output, save_vertices, save_edges, threads_per_machine);
		graph.save(save_model + ".predict", prediction_saver(), gzip_output, save_vertices, save_edges, threads_per_machine);
	}

	graphlab::mpi_tools::finalize();
	return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/als.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * \brief The main file for the ALS matrix factorization algorithm.
 *
 * This file contains the main body of the ALS matrix factorization
 * algorithm. 
 */

#include <Eigen/Dense>

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


// This file defines the serialization code for the eigen types.
#include "eigen_serialization.hpp"

#include <graphlab.hpp>
#include <graphlab/util/stl_util.hpp>
#include "stats.hpp"

#include <graphlab/macros_def.hpp>

const int SAFE_NEG_OFFSET = 2; //add 2 to negative node id
//to prevent -0 and -1 which arenot allowed


/**
 * \brief We use the eigen library's vector type to represent
 * mathematical vectors.
 */
typedef Eigen::VectorXd vec_type;

/**
 * \brief We use the eigen library's matrix type to represent
 * matrices.
 */
typedef Eigen::MatrixXd mat_type;


/** 
 * \ingroup toolkit_matrix_factorization
 *
 * \brief the vertex data type which contains the latent factor.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the ALS graph.  Associated with each vertex is a factor
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the ALS algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column factors.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The number of times this vertex has been updated. */
  uint32_t nupdates;
  /** \brief The most recent L1 change in the factor value */
  float residual; //! how much the latent value has changed
  /** \brief The latent factor for this vertex */
  vec_type factor;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : nupdates(0), residual(1) { randomize(); } 
  /** \brief Randomizes the latent factor */
  void randomize() { factor.resize(NLATENT); factor.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << nupdates << residual << factor;        
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> nupdates >> residual >> factor;
  }
}; // end of vertex data


size_t vertex_data::NLATENT = 20;

/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data also stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = TRAIN) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

stats_info count_edges(const graph_type::edge_type & edge){
  stats_info ret;

  if (edge.data().role == edge_data::TRAIN)
     ret.training_edges = 1;
  else if (edge.data().role == edge_data::VALIDATE)
     ret.validation_edges = 1;
  ret.max_user = (size_t)edge.source().id();
  ret.max_item = (-edge.target().id()-SAFE_NEG_OFFSET);
  return ret;
}


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 * \brief The gather type used to construct XtX and Xty needed for the ALS
 * update
 *
 * To compute the ALS update we need to compute the sum of 
 * \code
 *  sum: XtX = nbr.factor.transpose() * nbr.factor 
 *  sum: Xy  = nbr.factor * edge.obs
 * \endcode
 * For each of the neighbors of a vertex. 
 *
 * To do this in the Gather-Apply-Scatter model the gather function
 * computes and returns a pair consisting of XtX and Xy which are then
 * added. The gather type represents that tuple and provides the
 * necessary gather_type::operator+= operation.
 *
 */
class gather_type {
public:
  /**
   * \brief Stores the current sum of nbr.factor.transpose() *
   * nbr.factor
   */
  mat_type XtX;

  /**
   * \brief Stores the current sum of nbr.factor * edge.obs
   */
  vec_type Xy;

  /** \brief basic default constructor */
  gather_type() { }

  /**
   * \brief This constructor computes XtX and Xy and stores the result
   * in XtX and Xy
   */
  gather_type(const vec_type& X, const double y) :
    XtX(X.size(), X.size()), Xy(X.size()) {
    XtX.triangularView<Eigen::Upper>() = X * X.transpose();
    Xy = X * y;
  } // end of constructor for gather type

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << XtX << Xy; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> XtX >> Xy; }  

  /** 
   * \brief Computes XtX += other.XtX and Xy += other.Xy updating this
   * tuples value
   */
  gather_type& operator+=(const gather_type& other) {
    if(other.Xy.size() == 0) {
      ASSERT_EQ(other.XtX.rows(), 0);
      ASSERT_EQ(other.XtX.cols(), 0);
    } else {
      if(Xy.size() == 0) {
        ASSERT_EQ(XtX.rows(), 0); 
        ASSERT_EQ(XtX.cols(), 0);
        XtX = other.XtX; Xy = other.Xy;
      } else {
        XtX.triangularView<Eigen::Upper>() += other.XtX;  
        Xy += other.Xy;
      }
    }
    return *this;
  } // end of operator+=

}; // end of gather type


/**
 * \brief ALS vertex program implements the alternating least squares
 * algorithm in the Gather-Apply-Scatter abstraction.
 *
 * The ALS update treats adjacent vertices (rows or columns) as "X"
 * (independent) values and the edges (matrix entries) as observed "y"
 * (dependent) values and then updates the current vertex value as a
 * weight "w" such that:
 *
 *    y = X * w + noise
 *
 * This is accomplished using the following equation:
 *
 *    w = inv(X' * X) * (X' * y)
 *
 * We implement this in the Gather-Apply-Scatter model by:
 *
 *  1) Gather: returns the tuple (X' * X, X' * y)
 *     Sum:   (aX' * aX, aX * ay) + (bX' * bX, bX * by) = 
 *                 (aX' * aX + bX' * bX, aX * ay + bX * by)
 *
 *  2) Apply: Solves  inv(X' * X) * (X' * y)
 *
 *  3) Scatter: schedules the update of adjacent vertices if this
 *      vertex has changed sufficiently and the edge is not well
 *      predicted.
 *
 * 
 */ 
class als_vertex_program : 
  public graphlab::ivertex_program<graph_type, gather_type,
                                   graphlab::messages::sum_priority>,
  public graphlab::IS_POD_TYPE {
public:
  /** The convergence tolerance */
  static double TOLERANCE;
  static double LAMBDA;
  static size_t MAX_UPDATES;
  static double MAXVAL;
  static double MINVAL;
  static int    REGNORMAL; //regularization type

  /** The set of edges to gather along */
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /** The gather function computes XtX and Xy */
  gather_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    if(edge.data().role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      return gather_type(other_vertex.data().factor, edge.data().obs);
    } else return gather_type();
  } // end of gather function

  /** apply collects the sum of XtX and Xy */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    // Get and reset the vertex data
    vertex_data& vdata = vertex.data(); 
    // Determine the number of neighbors.  Each vertex has only in or
    // out edges depending on which side of the graph it is located
    if(sum.Xy.size() == 0) { vdata.residual = 0; ++vdata.nupdates; return; }
    mat_type XtX = sum.XtX;
    vec_type Xy = sum.Xy;
    // Add regularization
    double regularization = LAMBDA;
    if (REGNORMAL)
      regularization = LAMBDA*vertex.num_out_edges();
    for(int i = 0; i < XtX.rows(); ++i) 
      XtX(i,i) += regularization; 
    // Solve the least squares problem using eigen ----------------------------
    const vec_type old_factor = vdata.factor;
    vdata.factor = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xy);
    // Compute the residual change in the factor factor -----------------------
    vdata.residual = (vdata.factor - old_factor).cwiseAbs().sum() / XtX.rows();
    ++vdata.nupdates;
  } // end of apply
  
  /** The edges to scatter along */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /** Scatter reschedules neighbors */  
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    edge_data& edata = edge.data();
    if(edata.role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      const vertex_data& vdata = vertex.data();
      const vertex_data& other_vdata = other_vertex.data();
      //TODO:
      //    Do we need to cap the prediction value into [min, max] here?
      const double pred = vdata.factor.dot(other_vdata.factor);
      const float error = std::fabs(edata.obs - pred);
      const double priority = (error * vdata.residual); 
      // Reschedule neighbors ------------------------------------------------
      if( priority > TOLERANCE && other_vdata.nupdates < MAX_UPDATES) 
        context.signal(other_vertex, priority);
    }
  } // end of scatter function


  /**
   * \brief Signal all vertices on one side of the bipartite graph
   */
  static graphlab::empty signal_left(icontext_type& context,
                                     const vertex_type& vertex) {
    if(vertex.num_out_edges() > 0) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_left 


}; // end of als vertex program


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
                         const std::string& filename,
                         const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
  // Parse the line
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0); 
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(target_id) = qi::_1] >> 
      -(-qi::char_(',') >> qi::float_[phoenix::ref(obs) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space); 

  if(!success) return false;

  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    if (obs < als_vertex_program::MINVAL || obs > als_vertex_program::MAXVAL)
      logstream(LOG_FATAL)<<"Rating values should be between " << als_vertex_program::MINVAL << " and " << als_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
  }
 
  // map target id into a separate number space
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred = 
    edge.source().data().factor.dot(edge.target().data().factor);
  pred = std::min(als_vertex_program::MAXVAL, pred);
  pred = std::max(als_vertex_program::MINVAL, pred);
  return (edge.data().obs - pred) * (edge.data().obs - pred);
} // end of extract_l2_error


double als_vertex_program::TOLERANCE = 1e-3;
double als_vertex_program::LAMBDA = 0.01;
size_t als_vertex_program::MAX_UPDATES = -1;
double als_vertex_program::MAXVAL = 1e+100;
double als_vertex_program::MINVAL = -1e+100;
int    als_vertex_program::REGNORMAL = 1;


/**
 * \brief The error aggregator is used to accumulate the overal
 * prediction error.
 *
 * The error aggregator is itself a "reduction type" and contains the
 * two static methods "map" and "finalize" which operate on
 * error_aggregators and are used by the engine.add_edge_aggregator
 * api.
 */
struct error_aggregator : public graphlab::IS_POD_TYPE {
  typedef als_vertex_program::icontext_type icontext_type;
  typedef graph_type::edge_type edge_type;
  double train_error, validation_error;
  error_aggregator() : 
    train_error(0), validation_error(0) { }
  error_aggregator& operator+=(const error_aggregator& other) {
    train_error += other.train_error;
    validation_error += other.validation_error;
    return *this;
  }
  static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
    error_aggregator agg;
    if(edge.data().role == edge_data::TRAIN) {
      agg.train_error = extract_l2_error(edge); 
    } else if(edge.data().role == edge_data::VALIDATE) {
      agg.validation_error = extract_l2_error(edge);
    }
    return agg;
  }
  static void finalize(icontext_type& context, const error_aggregator& agg) {
    const double train_error = std::sqrt(agg.train_error / info.training_edges);
    context.cout() << "Time in seconds: " << context.elapsed_seconds() << "\tiTraining RMSE: " << train_error;
    if(info.validation_edges > 0) {
      const double validation_error = 
        std::sqrt(agg.validation_error / info.validation_edges);
      context.cout() << "\tValidation RMSE: " << validation_error; 
    }
    context.cout() << std::endl;
  }
}; // end of error aggregator


/**
 * \brief The prediction saver is used by the graph.save routine to
 * output the final predictions back to the filesystem.
 */
struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if(edge.data().role == edge_data::PREDICT) {
      std::stringstream strm;
      double prediction =
        edge.source().data().factor.dot(edge.target().data().factor);
	  prediction = std::min(als_vertex_program::MAXVAL, prediction);
	  prediction = std::max(als_vertex_program::MINVAL, prediction);
      strm << edge.source().id() << '\t';
      strm << (-edge.target().id() - SAFE_NEG_OFFSET) << '\t';
      strm << prediction << '\n';
      return strm.str();
    } else return "";
  }
}; // end of prediction_saver


struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<als_vertex_program> engine_type;

int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  std::string predictions;
  size_t interval = 10;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D",  vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("max_iter", als_vertex_program::MAX_UPDATES,
                       "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("lambda", als_vertex_program::LAMBDA, 
                       "ALS regularization weight"); 
  clopts.attach_option("tol", als_vertex_program::TOLERANCE,
                       "residual termination threshold");
  clopts.attach_option("maxval", als_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", als_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("interval", interval, 
                       "The time in seconds between error reports");
  clopts.attach_option("predictions", predictions,
                       "The prefix (folder and filename) to save predictions.");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("regnormal", als_vertex_program::REGNORMAL, 
                       "regularization type. 1 = weighted according to neighbors num. 0 = no weighting - just lambda");
  
  parse_implicit_command_line(clopts);
  
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }


  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "minval: " << als_vertex_program::MINVAL << std::endl;
  dc.cout() << "maxval: " << als_vertex_program::MAXVAL << std::endl;
  
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
            << timer.current_time() << std::endl;

  if (dc.procid() == 0) 
    add_implicit_edges<edge_data>(implicitratingtype, graph, dc);
  
  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
   graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
            << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
      << "========== Graph statistics on proc " << dc.procid() 
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
      << "\n Edge balance ratio: " 
      << float(graph.num_local_edges())/graph.num_edges()
      << std::endl;
 
  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);

  // Add error reporting to the engine
  const bool success = engine.add_edge_aggregator<error_aggregator>
    ("error", error_aggregator::map, error_aggregator::finalize) &&
    engine.aggregate_periodic("error", interval);
  ASSERT_TRUE(success);
  

  // Signal all vertices on the vertices on the left (liberals) 
  engine.map_reduce_vertices<graphlab::empty>(als_vertex_program::signal_left);
  info = graph.map_reduce_edges<stats_info>(count_edges);
  dc.cout()<<"Training edges: " << info.training_edges << " validation edges: " << info.validation_edges << std::endl;

  // Run ALS ---------------------------------------------------------
  dc.cout() << "Running ALS" << std::endl;
  timer.start();
  engine.start();  

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime 
            << std::endl
            << "Updates executed: " << engine.num_updates() << std::endl
            << "Update Rate (updates/second): " 
            << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  engine.aggregate_now("error");

  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const size_t threads_per_machine = 2;

    //save the predictions
    graph.save(predictions, prediction_saver(),
               gzip_output, false, 
               true, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
		gzip_output, true, false, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
		gzip_output, true, false, threads_per_machine);
  
  }
             

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/biassgd.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * \brief The main file for the BIAS-SGD matrix factorization algorithm.
 *
 * This file contains the main body of the BIAS-SGD matrix factorization
 * algorithm. 
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>
#include "eigen_serialization.hpp"
#include <Eigen/Dense>
#include <graphlab/macros_def.hpp>


typedef Eigen::VectorXd vec_type;
typedef Eigen::MatrixXd mat_type;

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
static bool debug;
int iter = 0;
/** 
 * \ingroup toolkit_matrix_pvecization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the BIASSGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the BIASSGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The number of times this vertex has been updated. */
  uint32_t nupdates;
  /** \brief The latent pvec for this vertex */
  vec_type pvec;
  double bias;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : nupdates(0), bias(0) { if (debug) pvec = vec_type::Ones(NLATENT); else randomize(); } 
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(NLATENT); pvec.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << nupdates << pvec << bias;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> nupdates >> pvec >> bias;
  }
}; // end of vertex data


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data biassgdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

double extract_l2_error(const graph_type::edge_type & edge);


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 *
 */
class gather_type {
public:
  vec_type pvec; //vector of gradient updates
  double bias; //bias change

  /** \brief basic default constructor */
  gather_type() { }

  /**
   */
  gather_type(const vec_type& X, double _bias) {
    pvec = X;
    bias = _bias;
  } // end of constructor for gather type

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << pvec << bias; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> pvec >> bias; }  

  /** 
   */
  gather_type& operator+=(const gather_type& other) {
    if (pvec.size() == 0){
      pvec = other.pvec;
      bias = other.bias;
      return *this;
    }
    else if (other.pvec.size() == 0)
      return *this;
    //sum up gradient updates
    pvec += other.pvec;
    bias += other.bias;
    return *this;
  } // end of operator+=

}; // end of gather type

//typedef gather_type message_type;

/**
 * BIASSGD vertex program type
 */ 
class biassgd_vertex_program : 
  public graphlab::ivertex_program<graph_type, gather_type,
                                   gather_type> {
public:
  /** The convergence tolerance */
  static double TOLERANCE;
  static double LAMBDA;
  static double GAMMA;
  static double MAXVAL;
  static double MINVAL;
  static double STEP_DEC;
  static bool debug;
  static size_t MAX_UPDATES;
  static double GLOBAL_MEAN;
  static size_t NUM_TRAINING_EDGES;
  static uint   USERS;

  gather_type pmsg;
  void save(graphlab::oarchive& arc) const { 
    arc << pmsg;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> pmsg;
  }

  /** The set of edges to gather along */
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /** The gather function computes XtX and Xy */
  gather_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    //if(edge.data().role == edge_data::TRAIN) {
   vec_type delta, other_delta;
   double bias =0, other_bias = 0;

   //this is a user node
   if (vertex.num_in_edges() == 0){
      vertex_type other_vertex(get_other_vertex(edge, vertex));
      vertex_type my_vertex(vertex);
      //predict rating
      double pred = biassgd_vertex_program::GLOBAL_MEAN + 
        edge.source().data().bias + edge.target().data().bias + 
        vertex.data().pvec.dot(other_vertex.data().pvec);
      pred = std::min(pred, biassgd_vertex_program::MAXVAL);
      pred = std::max(pred, biassgd_vertex_program::MINVAL); 
      //compute the error
      const float err = (pred - edge.data().obs);
      if (debug)
        std::cout<<"entering edge " << (int)edge.source().id() << ":" << (int)edge.target().id() << " err: " << err << " rmse: " << err*err <<std::endl;
      if (std::isnan(err))
        logstream(LOG_FATAL)<<"Got into numeric errors.. try to tune step size and regularization using --lambda and --gamma flags" << std::endl;
      
      if (edge.data().role == edge_data::TRAIN){
         
        bias = -GAMMA*(err + LAMBDA*my_vertex.data().bias);
        other_bias = -GAMMA*(err + LAMBDA* other_vertex.data().bias);
         
        delta = -GAMMA*(err*other_vertex.data().pvec + LAMBDA*vertex.data().pvec);
        other_delta = -GAMMA*(err*vertex.data().pvec + LAMBDA*other_vertex.data().pvec);
       
        //A HACK: update memory cached values to reflect new vals 
        my_vertex.data().bias += bias;
        other_vertex.data().bias += other_bias;
        my_vertex.data().pvec += delta;
	other_vertex.data().pvec += other_delta;
      
      if (debug)
          std::cout<<"new val:" << (int)edge.source().id() << ":" << (int)edge.target().id() << " U " << my_vertex.data().pvec.transpose() << " V " << other_vertex.data().pvec.transpose() << std::endl;
         if(std::fabs(err) > TOLERANCE && other_vertex.data().nupdates < MAX_UPDATES) 
          context.signal(other_vertex, gather_type(other_delta, other_bias));
       }
      return gather_type(delta, bias);
    } 
    else return gather_type(delta, bias);
  } // end of gather function

//typedef vec_type message_type;
 void init(icontext_type& context,
                              const vertex_type& vertex,
                              const message_type& msg) {
     if (vertex.num_in_edges() > 0){
        pmsg = msg;
     }
  }
  /** apply graident updates to feature vector */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    // Get and reset the vertex data
    vertex_data& vdata = vertex.data(); 
    if (sum.pvec.size() > 0){
      vdata.pvec += sum.pvec; 
      vdata.bias += sum.bias;
      assert(vertex.num_in_edges() == 0);
    }
    else if (pmsg.pvec.size() > 0){
      vdata.pvec += pmsg.pvec;
      vdata.bias += pmsg.bias;
      assert(vertex.num_out_edges() == 0); 
    }
    ++vdata.nupdates;
  } // end of apply
  
  /** The edges to scatter along */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /** Scatter reschedules neighbors */  
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    edge_data& edata = edge.data();
    if(edata.role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      // Reschedule neighbors ------------------------------------------------
      if(other_vertex.data().nupdates < MAX_UPDATES) 
        context.signal(other_vertex, gather_type(vec_type::Zero(vertex_data::NLATENT),0));
    }
  } // end of scatter function


  /**
   * \brief Signal all vertices on one side of the bipartite graph
   */
  static graphlab::empty signal_left(icontext_type& context,
                                     vertex_type& vertex) {
    if(vertex.num_out_edges() > 0) context.signal(vertex, gather_type(vec_type::Zero(vertex_data::NLATENT),0));
    return graphlab::empty();
  } // end of signal_left 

}; // end of biassgd vertex program


struct error_aggregator : public graphlab::IS_POD_TYPE {
  typedef biassgd_vertex_program::icontext_type icontext_type;
  typedef graph_type::edge_type edge_type;
  double train_error, validation_error;
  size_t ntrain, nvalidation;
  error_aggregator() : 
    train_error(0), validation_error(0), ntrain(0), nvalidation(0) { }
  error_aggregator& operator+=(const error_aggregator& other) {
    train_error += other.train_error;
    assert(!std::isnan(train_error));
    validation_error += other.validation_error;
    ntrain += other.ntrain;
    nvalidation += other.nvalidation;
    return *this;
  }
  static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
    error_aggregator agg;
    if (edge.data().role == edge_data::TRAIN){
      agg.train_error = extract_l2_error(edge); agg.ntrain = 1;
      assert(!std::isnan(agg.train_error));
    }
    else if (edge.data().role == edge_data::VALIDATE){
      agg.validation_error = extract_l2_error(edge); agg.nvalidation = 1;
    }
    return agg;
  }


  static void finalize(icontext_type& context, const error_aggregator& agg) {
    iter++;
    if (iter%2 == 0)
      return; 
    ASSERT_GT(agg.ntrain, 0);
    const double train_error = std::sqrt(agg.train_error / agg.ntrain);
    assert(!std::isnan(train_error));
    context.cout() << std::setw(8) << context.elapsed_seconds() << "  " << std::setw(8) << train_error;
    if(agg.nvalidation > 0) {
      const double validation_error = 
        std::sqrt(agg.validation_error / agg.nvalidation);
      context.cout() << "   " << std::setw(8) << validation_error; 
    }
    context.cout() << std::endl;
    biassgd_vertex_program::GAMMA *= biassgd_vertex_program::STEP_DEC;
  }
}; // end of error aggregator

/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred = biassgd_vertex_program::GLOBAL_MEAN + 
      edge.source().data().bias +
      edge.target().data().bias + 
      edge.source().data().pvec.dot(edge.target().data().pvec);
  pred = std::min(biassgd_vertex_program::MAXVAL, pred);
  pred = std::max(biassgd_vertex_program::MINVAL, pred);
  double rmse = (edge.data().obs - pred) * (edge.data().obs - pred);
  assert(rmse <= pow(biassgd_vertex_program::MAXVAL-biassgd_vertex_program::MINVAL,2));
  return rmse;
} // end of extract_l2_error


struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
   if (edge.data().role != edge_data::PREDICT)
      return "";

 std::stringstream strm;
  double prediction =  biassgd_vertex_program::GLOBAL_MEAN + 
      edge.source().data().bias +
      edge.target().data().bias + 
      edge.source().data().pvec.dot(edge.target().data().pvec);
    prediction = std::min(biassgd_vertex_program::MAXVAL, prediction);
    prediction = std::max(biassgd_vertex_program::MINVAL, prediction);
    strm << edge.source().id() << '\t' 
         << -edge.target().id()-SAFE_NEG_OFFSET << '\t'
         << prediction << '\n';
    return strm.str();
  }
}; // end of prediction_saver

struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_bias_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      ret += boost::lexical_cast<std::string>(vertex.data().bias) + "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 
struct linear_model_saver_bias_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + " ";
      ret += boost::lexical_cast<std::string>(vertex.data().bias) + "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
    const std::string& filename,
    const std::string& line) {

 // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0);
  strm >> source_id >> target_id;

  if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
    logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
    return true;
  }

  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
 
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    strm >> obs;
    if (obs < biassgd_vertex_program::MINVAL || obs > biassgd_vertex_program::MAXVAL){
      logstream(LOG_WARNING)<<"Rating values should be between " << biassgd_vertex_program::MINVAL << " and " << biassgd_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
      assert(false); 
    }
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


size_t vertex_data::NLATENT = 20;
double biassgd_vertex_program::TOLERANCE = 1e-3;
double biassgd_vertex_program::LAMBDA = 0.001;
double biassgd_vertex_program::GAMMA = 0.001;
size_t biassgd_vertex_program::MAX_UPDATES = -1;
double biassgd_vertex_program::MAXVAL = 1e+100;
double biassgd_vertex_program::MINVAL = -1e+100;
double biassgd_vertex_program::STEP_DEC = 0.9;
bool biassgd_vertex_program::debug = false;
double biassgd_vertex_program::GLOBAL_MEAN = 0;
size_t biassgd_vertex_program::NUM_TRAINING_EDGES = 0;

/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<biassgd_vertex_program> engine_type;

double calc_global_mean(const graph_type::edge_type & edge){
  if (edge.data().role == edge_data::TRAIN)
     return edge.data().obs;
  else return 0;
}

size_t count_edges(const graph_type::edge_type & edge){
  if (edge.data().role == edge_data::TRAIN)
     return 1;
  else return 0;
}


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the bias-SGD factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  std::string predictions;
  size_t interval = 0;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D", vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("max_iter", biassgd_vertex_program::MAX_UPDATES,
                       "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("lambda", biassgd_vertex_program::LAMBDA, 
                       "SGD regularization weight"); 
  clopts.attach_option("gamma", biassgd_vertex_program::GAMMA, 
                       "SGD step size"); 
  clopts.attach_option("debug", biassgd_vertex_program::debug, 
                       "debug - additional verbose info"); 
  clopts.attach_option("tol", biassgd_vertex_program::TOLERANCE,
                       "residual termination threshold");
  clopts.attach_option("maxval", biassgd_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", biassgd_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("step_dec", biassgd_vertex_program::STEP_DEC, "multiplicative step decrement");
  clopts.attach_option("interval", interval, 
                       "The time in seconds between error reports");
  clopts.attach_option("predictions", predictions,
                       "The prefix (folder and filename) to save predictions.");

  parse_implicit_command_line(clopts);

  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
 debug = biassgd_vertex_program::debug;
  //  omp_set_num_threads(clopts.get_ncpus());
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
            << timer.current_time() << std::endl;
  if (dc.procid() == 0) 
    add_implicit_edges<edge_data>(implicitratingtype, graph, dc);

dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
            << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
      << "========== Graph statistics on proc " << dc.procid() 
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
      << "\n Edge balance ratio: " 
      << float(graph.num_local_edges())/graph.num_edges()
      << std::endl;
 
  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);

  // Add error reporting to the engine
  const bool success = engine.add_edge_aggregator<error_aggregator>
    ("error", error_aggregator::map, error_aggregator::finalize) &&
    engine.aggregate_periodic("error", interval);
  ASSERT_TRUE(success);
  

  biassgd_vertex_program::GLOBAL_MEAN = graph.map_reduce_edges<double>(calc_global_mean);
  biassgd_vertex_program::NUM_TRAINING_EDGES = graph.map_reduce_edges<size_t>(count_edges);
  biassgd_vertex_program::GLOBAL_MEAN /= biassgd_vertex_program::NUM_TRAINING_EDGES;
  dc.cout() << "Global mean is: " <<biassgd_vertex_program::GLOBAL_MEAN << std::endl;

  // Signal all vertices on the vertices on the left (libersgd) 
  engine.map_reduce_vertices<graphlab::empty>(biassgd_vertex_program::signal_left);
 

  dc.cout() << "Running Bias-SGD" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  dc.cout() << "Time   Training    Validation" <<std::endl;
  dc.cout() << "       RMSE        RMSE " <<std::endl;
  timer.start();
  engine.start();  

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime 
            << std::endl
            << "Updates executed: " << engine.num_updates() << std::endl
            << "Update Rate (updates/second): " 
            << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  engine.aggregate_now("error");

  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;
    graph.save(predictions, prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".bias.U", linear_model_saver_bias_U(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".bias.V", linear_model_saver_bias_V(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
      
  }
             

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/cdf.hpp
================================================
/**  
 * Copyright (c) 2013 GraphLab Inc.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
*/
#ifndef CDF_HPP
#define CDF_HPP

#include <cmath>

// IMPLEMENTATION OF GAUSSIAN CFD
// TAKEN FROM : http://www.johndcook.com/cpp_phi.html

// constants
const double phi_a1 =  0.254829592;
const double phi_a2 = -0.284496736;
const double phi_a3 =  1.421413741;
const double phi_a4 = -1.453152027;
const double phi_a5 =  1.061405429;
const double phi_p  =  0.3275911;


double phi(double x)
{
    // Save the sign of x
    int sign = 1;
    if (x < 0)
        sign = -1;
    x = fabs(x)/sqrt(2.0);

    // A&S formula 7.1.26
    double t = 1.0/(1.0 + phi_p*x);
    double y = 1.0 - (((((phi_a5*t + phi_a4)*t) + phi_a3)*t + phi_a2)*t + phi_a1)*t*exp(-x*x);

    return 0.5*(1.0 + sign*y);
}


#endif


================================================
FILE: toolkits/collaborative_filtering/collaborative_filtering.dox
================================================
/**

\page collaborative_filtering Collaborative Filtering


\brief The collaborative filtering toolkit contains tools for computing a linear model of the data, and predicting missing values based on this linear model. This is useful when computing recommendations for users. 

The collaborative filtering toolkit is written by Danny Bickson, CMU. Please send any code related questions to our <a href="https://groups.google.com/forum/?fromgroups#!forum/graphlab-kdd">Google group</a>. Any other inquiries can be directed to Danny.Bickson@gmail.com. You are more then welcome to visit my applied machine learning <a href="http://bickson.blogspot.com">blog</a>.

\section History
In GraphLab v1, the collaborative filtering package was implemented and optimized for a multicore machine.  
Currently this version is deprecated and no longer supported.

If you intend to utilize a single multicore machines it is recommended to take a look at GraphChi collaborative filtering toolkit, which can scale to datasets with billions of recommendations. 

In GraphLAb v2, the collaborative filtering toolkit is distributed, targeted for a cluster with a few machines, 
that way we can scale to much larger models.  

The GraphLab collaborative filtering toolkit 2.1 is under active
development. Please contact us if you encounter any issues or would
like an additional features.

\section Algorithms
The collaborative filtering toolkit in GraphLab v.2 currently contains:

 - \ref ALS "Alternating Least Squares (ALS)"
\verbatim
Yunhong Zhou, Dennis Wilkinson, Robert Schreiber and Rong Pan. Large-Scale Parallel Collaborative Filtering for the Netflix Prize. Proceedings of the 4th international conference on Algorithmic Aspects in Information and Management. Shanghai, China pp. 337-348, 2008.
\endverbatim
 - \ref CCD++ "ALS with parallel coordinate descent (CCD++)"
\verbatim
H.-F. Yu, C.-J. Hsieh, S. Si, I. S. Dhillon, Scalable Coordinate Descent Approaches to Parallel Matrix Factorization for Recommender Systems. IEEE International Conference on Data Mining(ICDM), December 2012.
Steffen Rendle, Zeno Gantner, Christoph Freudenthaler, and Lars Schmidt-Thieme. 2011. Fast context-aware recommendations with factorization machines. In Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval (SIGIR '11). ACM, New York, NY, USA, 635-644. 
\endverbatim
 - \ref SGD "Stochastic gradient descent (SGD)"
\verbatim
 Matrix Factorization Techniques for Recommender Systems Yehuda Koren, Robert Bell, Chris Volinsky In IEEE Computer, Vol. 42, No. 8. (07 August 2009), pp. 30-37. 
Takács, G, Pilászy, I., Németh, B. and Tikk, D. (2009). Scalable Collaborative Filtering Approaches for Large Recommender Systems. Journal of Machine Learning Research, 10, 623-656.
\endverbatim
 - \ref BIAS_SGD "Bias stochastic gradient descnet (Bias-SGD)"
\verbatim
Y. Koren. Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model. In ACM KDD 2008. Equation (5).
\endverbatim
 - \ref SVD_PLUS_PLUS "SVD++"
\verbatim
Y. Koren. Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model. In ACM KDD 2008. 
\endverbatim
 - \ref WALS "Weighted-ALS"
\verbatim
Collaborative Filtering for Implicit Feedback Datasets Hu, Y.; Koren, Y.; Volinsky, C. IEEE International Conference on Data Mining (ICDM 2008), IEEE (2008). 
D. Needell, J. A. Tropp CoSaMP: Iterative signal recovery from incomplete and inaccurate samples Applied and Computational Harmonic Analysis, Vol. 26, No. 3. (17 Apr 2008), pp. 301-321. 
\endverbatim
 - \ref SALS "Sparse-ALS"
\verbatim
Xi Chen, Yanjun Qi, Bing Bai, Qihang Lin and Jaime Carbonell. Sparse Latent Semantic Analysis. In SIAM International Conference on Data Mining (SDM), 2011. 
\endverbatim  
In the future we hope to implement to rest of V1 algorithms, like NMF, NMF, BPTF, etc.
 - \ref NMF "Non-negative matrix factorization"
\verbatim
NMF Lee, D..D., and Seung, H.S., (2001), 'Algorithms for Non-negative Matrix
Factorization', Adv. Neural Info. Proc. Syst. 13, 556-562.
\endverbatim
 - \ref SVD "Restarted lanczos algorithm"
\verbatim
V. Hern´andez, J. E. Rom´an and A. Tom´as. STR-8: Restarted Lanczos Bidiagonalization for the SVD in SLEPc. 
\endverbatim

- \ref ADPREDICTOR "Adpredictor"
\verbatim
Graepel, Thore, et al. "Web-scale bayesian click-through rate prediction for sponsored search advertising in microsoft's bing search engine." Proceedings of the 27th International Conference on Machine Learning (ICML-10). 2010.
\endverbatim

\section Input Input

The input to GraphLab v2.1 collaborative filtering toolkit should be prepared inside a directory. All files in the directory will be read in parallel by GraphLab. Each file has the following text format:
\verbatim
[ user ] [ item ] [ rating] \n
\endverbatim
Namely, each row holds one rating. user and item are unsigned integers, and the rating is a double value.  user and item does not have to be consecutive integers. Here are some allowed inputs:
\verbatim
1000 2 5.0
3 7 12.0
6 2 2.1
\endverbatim

There are three types of input files read from your input directory path:
*.predict - test file
*.validate - validation file
all other files - are training files.

Training files, are the historic recommendations that the linear model is build from. Validation files, are historic recommendations put aside, not used for training, but for the validation of the model. Test files are user/item pairs to compute recommendations as learned by the trained model.

Note: for weighted-ALS, the input has the follwoing format:
\verbatim
[user] [item] [weight] [rating] \n
\endverbatim
Since each rating has its associated weight.

\section RATINGS Computing ratings (test predictions)

Optionally, you can compute ratings for user/item pairs using the computed linear model. The prediction output Filename is specified by: --predictions=filename . If the --prediction command line is not used, the output is not saved. Additionally you need to prepare a file named somefilename.predict inside of your training folder, with user item pairs in the following format: 
\verbatim
[user] [item]\n
\endverbatim
The program computes the prediction based on the computed linear models for every user item pair.
The output format for the prediction is:
\verbatim
[user] [item] [rating]\n
\endverbatim

\section Output Output
The linear model is saved to the files: filename.U_X_of_Y and filename.V_X_of_Y
Whre X is the part number and Y is the total number of parts. On default there are two parts.
U and V' are the matrices which their product U*V' approximates the matrix A.
Ouptut format for the linear model matrix U is:
\verbatim
user factor1 factor2 .. factorN \n
\endverbatim

Ouptut format for the linear model matrix V is:
\verbatim
item factor1 factor2 .. factorN \n
\endverbatim

It is possible to merge the files together using the cat command. For example:
\verbatim
>  cat filename.U_2_of_2 >> filename.U_1_of_2
\endverbatim
will append the contents of part two of the matrix U into part one.

It is further possible to sort the output using the user or item id using the sort command.
\verbatim
>  sort -g -k 1,1 filename.U_1_of_2 > filename.U.sorted
\endverbatim
That way each row will contain one user (for matrix U) or one item (for matrix V) feature vectors in sorted order.


For bias-SGD and SVD++, two additional files are created: filename.bias.U and filename.bias.V
with the biasses.

NOTE: Output files are NOT sorted. Use the sort Linux command to sort them by user / item id. 
For example:
\verbatim
sort -g -k 1,1 filename.U > filename.U.sorted # sorts user features, by the first column (user id)
\endverbatim

\section ALS ALS
ALS (Alternating least squares)

Pros: Simple to use, not many command line arguments

Cons: intermediate accuracy, higher computational overhead

ALS is a simple yet powerful algorithm. In this model the prediction is computed as:
   r_ui = p_u * q_i
Where r_ui is a scalar rating of user u to item i, and p_u is the user feature vector of size D, q_i is the item feature vector of size D and the product is a vector product. 
The output of ALS is two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). In linear algebra notation the rating matrix R ~ UV


Below are ALS related command line options:
\verbatim
--D=XX	Set D the feature vector width. High width results in higher accuracy but slower execution time. Typical values are 20 -  100.
--lambda=XX	Set regularization. Regularization helps to prevent overfitting. 
--max_iter=XX The number of iterations.
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).
\endverbatim

And here is an exmaple ALS run:
\li Download the files: <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train">smallnetflix_mm.train</a> and <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate">smallnetflix_mm.validate</a> and save them inside a directory called smallnetflix/.
\li Run:
\verbatim
bickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./als smallnetflix/ --max_iter=5 --lambda=0.065 --ncpus=8 --minval=1 --maxval=5
TCP Communication layer constructed.
Loading graph.
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.train
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.validate
Loading graph. Finished in 20.4732
Finalizing graph.
INFO:     distributed_ingress_base.hpp(finalize:165): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:489): Graph info: 
	 nverts: 97266
	 nedges: 3843340
	 nreplicas: 97266
	 replication factor: 1
Finalizing graph. Finished in 4.72823
========== Graph statistics on proc 0 ===============
 Num vertices: 97266
 Num edges: 3843340
 Num replica: 97266
 Replica to vertex ratio: 1
 --------------------------------------------
 Num local own vertices: 97266
 Num local vertices: 97266
 Replica to own ratio: 1
 Num local edges: 3843340
 Edge balance ratio: 1
Creating engine
Running ALS
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 93705
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
17.8	2.99666	5.76023
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 1
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
33	2.07403	3.99939
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 2
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 93702
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
50.9	0.896588	1.76014
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 3
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
66.1	0.755783	1.45845
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 4
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 93547
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
83.9	0.697824	1.35614
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 5
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
99.1	0.677978	1.33864
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 6
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 91661
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
116.9	0.666121	1.3114
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 7
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3560
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
132	0.657644	1.31769
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 8
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 90443
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
149.7	0.651672	1.3017
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 9
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561

...
\endverbatim
\section CCD_PLUS_PLUS "CCD++"
ALS (Alternating least squares) with parallel coordinate descent

Pros: Simple to use, not many command line arguments, faster than ALS

Cons: less accurate than ALS

CCD++ is a recent improvement of ALS which uses parallel coordinate descent using a single dimension at a time
and thus avoids the need of costly linear algebra operations (especially when the dimension of the feature vector is high).
In this model the prediction is computed as:
   r_ui = p_u * q_i
Where r_ui is a scalar rating of user u to item i, and p_u is the user feature vector of size D, q_i is the item feature vector of size D and the product is a vector product. 
The output of ALS is two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). In linear algebra notation the rating matrix R ~ UV


Below are CCD++ related command line options:
\verbatim
--D=XX	Set D the feature vector width. High width results in higher accuracy but slower execution time. Typical values are 20 -  100.
--lambda=XX	Set regularization. Regularization helps to prevent overfitting. 
--max_iter=XX The number of iterations.
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).
\endverbatim


\section SGD "Stochastic gradient descent (SGD)"

Pros: fast method
Cons: need to tune step size, more iterations are needed relative to ALS.

SGD is a simple gradient descent algorithm. Prediction in SGD is done as in ALS:
   r_ui = p_u * q_i
Where r_ui is a scalar rating of user u to item i, and p_u is the user feature vector of size D, q_i is the item feature vector of size D and the product is a vector product. 
The output of ALS is two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). In linear algebra notation the rating matrix R ~ UV

\verbatim
--gamma=XX	Gradient descent step size
--lambda=XX	Gradient descent regularization
--step_dec=XX	Multiplicative step decrease. Should be between 0.1 to 1. Default is 0.9.
--D=X		Feature vector width. Common values are 20 - 150.
--max_iter=XX	Max number of iterations
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).
--tol=XX	Stop computation when absolute error of prediction is less than tolerance. Default is 1e-3.
\endverbatim

Here is an example SGD run on small Netflix data:
\li Download the files: <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train">smallnetflix_mm.train</a> and <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate">smallnetflix_mm.validate</a> and save them inside a directory called smallnetflix/.
\li Run:
\verbatim
bickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./sgd smallnetflix  --ncpus=8 --prediction=out --max_iter=10  --gamma=1e-3 --lambda=5e-4 --step_dec=0.9999 --minval=1 --maxval=5
TCP Communication layer constructed.
Loading graph.
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.train
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.validate
Loading graph. Finished in 8.13307
Finalizing graph.
INFO:     distributed_ingress_base.hpp(finalize:165): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:489): Graph info: 
	 nverts: 97266
	 nedges: 3843340
	 nreplicas: 97266
	 replication factor: 1
Finalizing graph. Finished in 4.71821
========== Graph statistics on proc 0 ===============
 Num vertices: 97266
 Num edges: 3843340
 Num replica: 97266
 Replica to vertex ratio: 1
 --------------------------------------------
 Num local own vertices: 97266
 Num local vertices: 97266
 Replica to own ratio: 1
 Num local edges: 3843340
 Edge balance ratio: 1
Creating engine
WARNING:  distributed_aggregator.hpp(test_vertex_mapper_type:344): 
Vertex Map Function does not pass strict runtime type checks. 
Function prototype should be 
	 ReductionType f(icontext_type&, const vertex_type&)
If you are not intentionally violating the abstraction, we recommend fixing your function for safety reasons
Running SGD
(C) Code by Danny Bickson, CMU 
Please send bug reports to danny.bickson@gmail.com
Time   Training    Validation
       RMSE        RMSE 
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 93705
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
6.2	3.36002	3.49087
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 1
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
15.2	2.08215	2.49183
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 3
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
24.6	1.91162	2.05136
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 5
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
33.7	1.77294	1.80171
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 7
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
42.6	1.74585	1.68424
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 9
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
51.7	1.63199	1.56293
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 11
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
60.7	1.58655	1.50337
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 13
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
69.8	1.48326	1.4251
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 15
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
78.8	1.43588	1.38834
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 17
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
87.8	1.34333	1.33439
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 19
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
----------------------------------------------------------
Final Runtime (seconds):   90.064
Updates executed: 972660
Update Rate (updates/second): 10799.7
Final error: 
91.7	2.04374	1.87504
Saving predictions
\endverbatim


\section BIAS_SGD BIAS-SGD

Pros: fast method
Cons: need to tune step size

Bias-SGD is a simple gradient descent algorithm, where besides of the feature vector we also compute item and user biases (how much their average rating differs from the global average).
Prediction in bias-SGD is done as follows:

r_ui = global_mean_rating + b_u + b_i + p_u * q_i

Where global_mean_rating is the global mean rating, b_u is the bias of user u, b_i is the bias of item i and p_u and q_i are feature vectors as in ALS. You can read more about bias-SGD in reference [N]. 

The output of bias-SGD consists of two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). Additionally, the output consists of two vectors: bias for each user, bias for each item. Last, the global mean rating is also given as output.

\verbatim
--gamma=XX	Gradient descent step size
--lambda=XX	Gradient descent regularization
--step_dec=XX	Multiplicative step decrease. Should be between 0.1 to 1. Default is 0.9
--D=X		Feature vector width. Common values are 20 - 150.
--max_iter=XX	Max number of iterations
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).
\endverbatim

Example for running bias-SGD
\li Download the files: <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.train">smallnetflix_mm.train</a> and <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/smallnetflix_mm.validate">smallnetflix_mm.validate</a> and save them inside a directory called smallnetflix/.
\li Run:

\verbatim
ibickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./biassgd smallnetflix  --ncpus=8 --prediction=out --max_iter=10
TCP Communication layer constructed.
Loading graph.
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.train
INFO:     distributed_graph.hpp(load_from_posixfs:1743): Loading graph from file: smallnetflix/smallnetflix_mm.validate
Loading graph. Finished in 7.59514
Finalizing graph.
INFO:     distributed_ingress_base.hpp(finalize:165): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:489): Graph info: 
	 nverts: 97266
	 nedges: 3843340
	 nreplicas: 97266
	 replication factor: 1
Finalizing graph. Finished in 4.93781
========== Graph statistics on proc 0 ===============
 Num vertices: 97266
 Num edges: 3843340
 Num replica: 97266
 Replica to vertex ratio: 1
 --------------------------------------------
 Num local own vertices: 97266
 Num local vertices: 97266
 Replica to own ratio: 1
 Num local edges: 3843340
 Edge balance ratio: 1
Creating engine
Global mean is: 3.5992
WARNING:  distributed_aggregator.hpp(test_vertex_mapper_type:344): 
Vertex Map Function does not pass strict runtime type checks. 
Function prototype should be 
	 ReductionType f(icontext_type&, const vertex_type&)
If you are not intentionally violating the abstraction, we recommend fixing your function for safety reasons
Running Bias-SGD
(C) Code by Danny Bickson, CMU 
Please send bug reports to danny.bickson@gmail.com
Time   Training    Validation
       RMSE        RMSE 
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 93705
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
7.1     1.13985    1.15723
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 1
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
17.5    1.03638    1.07782
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 3
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
27.9    1.00508    1.05466
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 5
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
38.3    0.987878    1.04218
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 7
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
48.8    0.976675    1.03377
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 9
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
59.1    0.968729    1.02827
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 11
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
69.5    0.962782    1.0236
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 13
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
80      0.958178    1.02056
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 15
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
90.3    0.95442    1.01745
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 17
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
100.6    0.95139    1.01548
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 19
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
----------------------------------------------------------
Final Runtime (seconds):   102.971
Updates executed: 972660
Update Rate (updates/second): 9445.96
Final error: 
104.7    1.04346    1.13552
Saving predictions
\endverbatim


\section SVD_PLUS_PLUS SVD++
Pros: more accurate method than SGD once tuned, relatively fast method
Cons: a lot of parameters for tuning, immune to numerical errors when parameters are out of scope.

Koren SVD++ is an algorithm which is slightly more fancy than bias-SGD and give somewhat better prediction results.

Basic configuration --svdpp_step_dec=XX Multiplicative step decrement (between 0.1 to 1). Default is 0.9
\verbatim
--item_bias_step=XX Item bias step size
--item_bias_reg=XX  Item bias regularization
--user_bias_step=XX  User bias step size
--user_bias_reg=XX User bias regularization
--user_fctr_step=XX  User factor step size
--user_fctr_reg=XX User factor regularization
--item_fctr_step=XX Item factor step size
--item_fctr_reg=XX  Item factor regularization
--item_fctr2_step=XX  Item factor2 step size
--item_fctr2_reg=XX Item factor2 regularization
--D=X Feature vector width. Common values are 20 - 150.
--step_dec=XX	Multiplicative step decrease. Should be between 0.1 to 1. Default is 0.9
--max_iter=XX	Max number of iterations
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).
\endverbatim

 Prediction in Koren’s SVD++ algorithm is computed as follows:
\verbatim
r_ui = global_mean_rating + b_u + b_i + q_u * ( p_i + w_i )
\endverbatim
Where r_ui is the scalar rating for user u to item i, global_mean_rating is the global mean rating, b_u is a scalar bias for user u, b_i is a scalar bias for item i, q_u is a feature vectors of length D for user u, p_i is a feature vector of length D for item i, and w_i is an additional feature vector of length D (the weight). The product is a vector product. 

The output of Koren’s SVD++ is 5 output files:
\verbatim
Global mean ratings - include the scalar global mean rating.
user_bias  - includes a vector with bias for each user
movie_bias - includes a vector with bias for each movie
matrix U - includes in each row the feature vector q_u of size D.
matrix V - includes in each row the sum of feature vectors p_i + w_i of size D.
\endverbatim

\section WALS Weighted-ALS

Pros: Simple to use, allows iteration of weights which can be thought of confidence in the recommendation
Cons: intermediate accuracy, higher computational overhead

Weighted-ALS is a simple yet powerful algorithm. In this model the prediction is computed as:
   r_ui = p_u * q_i
Where r_ui is a scalar rating of user u to item i, and p_u is the user feature vector of size D, q_i is the item feature vector of size D and the product is a vector product. 
The output of ALS is two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). In linear algebra notation the rating matrix R ~ UV


Below are WALS related command line options:
\verbatim
--D=XX	Set D the feature vector width. High width results in higher accuracy but slower execution time. Typical values are 20 -  100.
--lambda=XX	Set regularization. Regularization helps to prevent overfitting. 
--max_iter=XX The number of iterations.
--maxval=XX	Maximum allowed rating
--minval=XX	Min allowed rating
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: ratings).

\endverbatim

And here is an exmaple WALS run:
\li Download the files: <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/time_smallnetflix.train">time_smallnetflix.train</a> and <a href="http://www.select.cs.cmu.edu/code/graphlab/datasets/time_smallnetflix.validate">time_smallnetflix.validate</a> and save them inside a directory called timenetflix/.
\li Run:
\verbatim
bickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./wals smallnetflix/ --max_iter=5 --lambda=0.065 --ncpus=8
bickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./wals timenetflix/
TCP Communication layer constructed.
Loading graph.
INFO:     distributed_graph.hpp(load_from_posixfs:1823): Loading graph from file: timenetflix/time_smallnetflix.train
INFO:     distributed_graph.hpp(load_from_posixfs:1823): Loading graph from file: timenetflix/time_smallnetflix.validate
Loading graph. Finished in 8.69352
Finalizing graph.
INFO:     distributed_ingress_base.hpp(finalize:166): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:493): Graph info: 
   nverts: 97266
   nedges: 3843340
   nreplicas: 97266
   replication factor: 1
Finalizing graph. Finished in 5.2593
========== Graph statistics on proc 0 ===============
 Num vertices: 97266
 Num edges: 3843340
 Num replica: 97266
 Replica to vertex ratio: 1
 --------------------------------------------
 Num local own vertices: 97266
 Num local vertices: 97266
 Replica to own ratio: 1
 Num local edges: 3843340
 Edge balance ratio: 1
Creating engine
Running Weighted-ALS
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 93705
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
64.8  24.3996 67.9795
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 1
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
119.2 18.866  75.5397
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 2
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 93704
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
184 10.6131 43.162
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 3
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
238.5 8.49288 28.1034
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 4
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 93702
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
303 7.24041 22.2866
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 5
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
357.6 6.74309 19.951
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 6
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 93585
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
422.1 6.42464 19.061
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 7
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 3561
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
476.5 6.25972 17.2991
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 8
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 93164
INFO:     synchronous_engine.hpp(start:1358):    Running Aggregators
542.3 6.13234 16.8022
INFO:     synchronous_engine.hpp(start:1260): 0: Starting iteration: 9
INFO:     synchronous_engine.hpp(start:1309):   Active vertices: 3561

...
\endverbatim

\section SALS Sparse-ALS 
Pros: Generate sparse factor matrices, that can be clustered into similar user/item groups
Cons: less accurate linear model because of the sparsification step

This algorithm is based on ALS, but an additional sparsifying step is performed on either the user feature vectors, the item feature vectors or both. This algorithm is useful for spectral clustering: first the rating matrix is factorized into a product of one or two sparse matrices, and then clustering can be computed on the feature matrices to detect similar users or items. 

The underlying algorithm which is used for sparsifying is CoSaMP. See reference  on the top of this page. 

Below are sparse-ALS related command line options:
\verbatim
--user_sparsity=XX  A number between 0.5 to 1 which defines how sparse is the resulting user feature factor matrix
--movie_sparsity=XX A number between 0.5 to 1 which defines how sparse is the resulting movie feature factor matrix
--algorithm=XX An integer between 1 to 3 which defines the run mode.
1 = SPARSE_USR_FACTOR
2 = SPARSE_ITM_FACTOR
3 = SPARSE_BOTH_FACTORS
\endverbatim
Prediction in sparse-ALS is computed like in ALS.

\section NMF "Non-negative matrix factorization"

Non-negative matrix factorization (NMF) is based on Lee and Seung [reference H]. 
Prediction is computed like in ALS:
    r_ui = p_u * q_i

Namely the scalar prediction r of user u is composed of the vector product of the user feature vector p_u (of size D), with the item feature vector q_i (of size D). The only difference is that both p_u and q_i have all nonnegative values.
The output of NMF is two matrices: filename.U and filename.V. The matrix U holds the user feature vectors in each row. (Each vector has exactly D columns). The matrix V holds the feature vectors for each time (Each vector has again exactly D columns). In linear algebra notation the rating matrix R ~ UV, U>=0, V>=0.

\subsection "NMF cost function and properties"

Unlike many of the other methods who is Euclidean distance, NMF cost function is:
      KL( UV’ || A) 
Namely the KL divergence between the approximating product UV’  and the original matrix A.
The objective is not computed in GraphLab, but you can easily compute it in Matlab if needed. 

NMF is a gradient descent type algorithm which is supposed to always converge. However it may converge to a local minima. The algorithm starts from a random solution and that is why different runs may converge to different solution. For debugging, if you are interested in verifying that multiple runs converge to the same point, use the flag --debug=true when running.

\section SVD Restarted Lanczos Iteration (SVD)
SVD is implemented using the restarted lanczos algorithm.
The input is a sparse matrix market format input file.
The output are 3 files: one file containing the singular values, and two dense matrix market format files containing the matrices U and V.

Note: for larger models, it is advised to use svd_onesided since it significantly saved memory.

Here is an example Matrix Market input file for the matrix A2:

<235|0>bickson@bigbro6:~/ygraphlab/graphlabapi/debug/toolkits/parsers$ 
cat A2
\verbatim
0 0  0.8147236863931789
0 1 0.9133758561390194
0 2  0.2784982188670484
0 3  0.9648885351992765
1 0  0.9057919370756192
1 1  0.6323592462254095
1 2  0.5468815192049838
1 3  0.1576130816775483
2 0  0.1269868162935061
2 1  0.09754040499940952
2 2  0.9575068354342976
2 3  0.9705927817606157
\endverbatim

Ceate a directory named A2, and inside it put the file A2.


Here is an for running SVD (using one mpi node, one core)
\verbatim
bickson@thrust:~/graphlab2.1/graphlabapi/debug/toolkits/collaborative_filtering$ ./svd A2 --rows=3 --cols=4 --nsv=3 --nv=4 --max_iter=3 --quiet=1 --save_vectors=1 --predictions=out
TCP Communication layer constructed.
Loading graph.
Loading graph. Finished in 0.004996
Finalizing graph.
Finalizing graph. Finished in 0.374135
========== Graph statistics on proc 0 ===============
 Num vertices: 7
 Num edges: 12
 Num replica: 7
 Replica to vertex ratio: 1
 --------------------------------------------
 Num local own vertices: 7
 Num local vertices: 7
 Replica to own ratio: 1
 Num local edges: 12
 Edge balance ratio: 1
Creating engine
Running SVD (gklanczos)
(C) Code by Danny Bickson, CMU 
Please send bug reports to danny.bickson@gmail.com
set status to tol
 Number of computed signular values 4
Singular value 0        2.16097 Error estimate:   1.05039e-15
Singular value 1        0.97902 Error estimate:   1.32491e-15
Singular value 2       0.554159 Error estimate:   9.92283e-16
Singular value 3    1.05388e-64 Error estimate:   3.42194e-16
----------------------------------------------------------
Final Runtime (seconds):   0.54851
Updates executed: 59
Update Rate (updates/second): 107.564
\endverbatim

For running with multiple mpi nodes run:
\verbatim 
mpiexec -n XX ./svd [ rest of the command line aguments ]
\endverbatim

\subsection Command line arguments

\verbatim
--training  Input file directory.
--nv  Buffer size of vectors. Typically the buffer size should be greater than the number of singular values you look for. When nv is higher, accuracy will be higher, but running time slower.
--nsv Number of singular values requested. Should be typically less than --nv
--ortho_repeats Number of repeats on the orthogonalization step. Default is 1 (no repeats). Increase this number for higher accuracy but slower execution. Maximal allowed values is 3.
--max_iter  Number of allowed restarts. The minimum is 2= no restart. When max_iter is higher, the result will be more accurate, but the running slower. 
--save_vectors=true Save the factorized matrices U and V to file. 
--predictions=XX	File name to write prediction to. Note that you will need a user/item pair input file named something.predict to enable predictions (see section: test ratings).
--tol Convergence threshold. For large matrices set this number set this number higher (for example 1e-1, while for small matrices you can set it to 1e-16). As smaller the convergence threshold execution is slower.
\endverbatim
--input_file_offset - for 1 based array index, use 1, for 0 based array index use 0. (Namely, array first index starts from 1 or 0).

Note: for improving accuracy tol should be reduced. max_iter and nv should be increased.

\subsection SVD0 "SVD Output"
On default, the singular values will be written to an output file. When using --save_vectors=1 the singular vectors of the matrices U and V will be written into file as well.
Here is an example of the output files created by the A2 example:
\verbatim
-rw-r--r--   1 bickson  staff       136 Nov 17 14:19 outsingular_values
-rw-r--r--   1 bickson  staff       353 Nov 17 14:19 out.V_1_of_1
-rw-r--r--   1 bickson  staff       244 Nov 17 14:19 out.U_1_of_1
\endverbatim
<br>
The singular_values file has a straightforward format:
\verbatim
%%GraphLab SVD Solver library. This file contains the singular values.
2.160971174556
0.9790200922132
0.5541592674291
1.69699593375e-64
\endverbatim

Now let's compate GraphLba's output to Matlab execution:
\verbatim
>> A2
ans =

    0.8147    0.9134    0.2785    0.9649
    0.9058    0.6324    0.5469    0.1576
    0.1270    0.0975    0.9575    0.9706

>>[u,d,v] = svd(A2)

u =

   -0.7019    0.2772    0.6561
   -0.5018    0.4613   -0.7317
   -0.5055   -0.8428   -0.1847


d =

    2.1610         0         0
         0    0.9790         0
         0         0    0.5542


v =

   -0.5047    0.5481   -0.2737
   -0.4663    0.4726    0.2139
   -0.4414   -0.4878   -0.7115
   -0.5770   -0.4882    0.6108
\endverbatim

And here is GraphLab output:
\verbatim
#> cat out.U_1_of_1
1 -0.70192004675202879 -0.27716662376092144 -0.6561150132717597 -5.4738221262688167e-48 
2 -0.50180137502007927 -0.46130533561664677 0.73170538289640219 0 
3 -0.50547366696553819 0.84283809240244056 0.18471686983009383 -1.3684555315672042e-48 

#> cat out.V_1_of_1
1 -0.50467448230910661 -0.54813128066725625 0.27370653410216472 -1.0321593257419978e-64 
2 -0.46633647481132878 -0.47257174529014068 -0.21394855212543565 1.2159686437710111e-64 
3 -0.44142369037529217 0.48778719783126245 0.71152275587696578 4.1893513895261429e-65 
4 -0.57704339935747928 0.4881513027120063 -0.61077501430760717 -4.0044466804328635e-65 

#> cat out.singular_values
%%GraphLab SVD Solver library. This file contains the singular values.
2.160971174556
0.9790200922132
0.5541592674291
1.69699593375e-64
\endverbatim

\subsection SVD1 "Understanding the error measure"
 Following Slepc, the error measure is computed by a combination of:
sqrt( ||Av_i - sigma(i) u_i ||_2^2 + ||A^Tu_i - sigma(i) V_i ||_2^2 ) / sigma(i)


Namely, the deviation of the approximation sigma(i) u_i  from Av_i , and vice versa.

\subsection SVD2 "Scalability"
 Currently the code was tested with up to 3.5 billion non-zeros on a 24 core machine. Each Lanczos iteration takes about 30 seconds. 

\subsection SVD3 "Difference to Mahout"
 Mahout SVD solver is implemented using the same Lanczos algorithm. However, there are several differences
1) In Mahout there are no restarts, so quality of the solution deteriorates very rapidly, after 5-10 iterations the solution is no longer accurate. Running without restarts can be done using our solution with the --max_iter=2 flag.
2) In Mahout there is a single orthonornalization step in each iteration while in our implementation there are two (after computation of u_i and after v_i ).
3) In Mahout there is no error estimation while we provide for each singular value the approximated error.
4) Our solution is typically x100 times faster than Mahout.

\section ADPREDICTOR Adpredictor
In a nutshell, AdPredictor computes a linear regression model with probit link function.
The input to the algorithm are observations of the type
\verbatim
-1 3:1 4:1 6:1 9:1
1 4:1 5:1 18:1 19:1
...
\endverbatim

where the first field -1 is the action (did not click) or 1 (clicked). Next there are pairs of binary features.
The output of the algorithm are weights for each feature. When a new ad comes in, we should simply sum up the weights for the matching features. If the weights are smaller than zero then the prediction is -1 and vice versa.

Adpredictor takes file in libsvm format. You should prepare a sub folder with the training file and validation (file needs to end with .validate).

You can run adpredictor using the command:
\verbatim
./adpredictor --matrix=folder/ --max_iter=10 --beta=1
\endverbatim

\section Implicit "Implicit Ratings"
Implicit rating handles the case where we have only positive examples (for example when a user bought a certain product) but we never have indication when a user DID NOT buy another product. The following paper 
\verbatim
Pan, Yunhong Zhou, Bin Cao, Nathan N. Liu, Rajan Lukose, Martin Scholz, and Qiang Yang. 2008. One-Class Collaborative Filtering. In Proceedings of the 2008 Eighth IEEE International Conference on Data Mining (ICDM '08). IEEE Computer Society, Washington, DC, USA, 502-511. 
\endverbatim
proposes to add negative examples at random for unobserved user/item pairs. Implicit rating is implemented in the collaborative filtering library and can be used with any of the algorithms explained above. 

\verbatim
--implicitratingtype=1  Adds implicit ratings at random
--implicitratingpercentage  A number between 1e-8 to 0.8  which determines what is the percentage of edges to add to the sparse model. 0 means none while 1 means fully dense model. 
--implicitratingvalue   The value of the rating added. On default it is zero, but you can change it. 
--implicitratingweight  Weight of the implicit rating (for WALS) OR
Time of the explicit rating (for tensor algorithms)
--users - the number of users. Note that users have to have consecutive ids between 1 and users.
--items - the number of items. Note that items have to have consecutive ids between 1 and items.
\endverbatim

Example for adding implicit ratings:
\verbatim
./als --matrix=smallnetflix/ --users=95526 --items=3561 --implicitratingtype=1 --implicitratingpercentage=0.01
TCP Communication layer constructed.
Loading graph.
INFO:     distributed_graph.hpp(load_from_posixfs:1823): Loading graph from file: smallnetflix/smallnetflix_mm.train
INFO:     distributed_graph.hpp(load_from_posixfs:1823): Loading graph from file: smallnetflix/smallnetflix_mm.validate
Loading graph. Finished in 1.17598
Going to add: 3401680 implicit edges. users: 95526 items: 3561
Finished adding 3401680 implicit edges. 
Finalizing graph.
INFO:     distributed_ingress_base.hpp(finalize:166): Finalizing Graph...
^C
\endverbatim

\section Acknowledgements Acknowledgements
\li Liang Xiong, CMU for providing the Matlab code of BPTF, numerous discussions and infinite support!! Thanks!!
\li Timmy Wilson, Smarttypes.org for providing twitter network snapshot example, and Python scripts for reading the output.
\li Sanmi Koyejo, from the University of Austin, Texas, for providing Python scripts for preparing the inputs.
\li Dan Brickely, from VU University Amsertdam, for helping debugging installation and prepare the input in Octave.
\li Nicholas Ampazis, University of the Aegean, for providing his SVD++ source ode.
\li Yehuda Koren, Yahoo! Research, for providing his SVD++ source code implementation.
\li Marinka Zitnik, University of Ljubljana, Slovenia, for helping debugging ALS and suggesting NMF algos to implement.
\li Joel Welling from Pittsburgh Supercomputing Center, for optimizing GraphLab on BlackLight supercomputer and simplifying installation procedure.
\li Sagar Soni from Gujarat Technological University and Hasmukh Goswami College of Engineering for helping testing the code.
\li Young Cha, UCLA for testing the code.
\li Mohit Singh for helping improve documentation.
\li Nicholas Kolegraff for testing our examples.
\li Theo Throuillon, Ecole Nationale Superieure d'Informatique et de Mathematiques Appliquees de Grenoble for debugging NMF.
\li Qiang Yan, Chinese Academy of Science for providing time-svd++, bias-SVD, RBM and LIBFM code that the Graphlab version is based on.
\li Ramakrishnan Kannan, Georgia Tech, for helping debugging and simplifying usage.
\li Charles Martin, GLG, for debugging NMF.
\li Alex Hasha, bundle.com for improving SGD and bias-SGD documentation and usability.
\li Zhao Yu (Jason Chao), douban.com, for identifying SGD/bias-SGD bugs.
*/


================================================
FILE: toolkits/collaborative_filtering/cosamp.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *  
 *  Code written by Danny Bickson, CMU
 *  Any changes to the code must include this original license notice in full.
 *  This file implements the shooting algorithm for solving Lasso problem
 */


#ifndef _COSAMP_HPP
#define _COSAMP_HPP


ivec sort_union(ivec a, ivec b){
   ivec ab = concat(a,b);
   sort(ab);
  for (int i=1; i< ab.size(); i++){
      if (ab[i] == ab[i-1])
        del(ab,i);
   }
   return ab;
}


vec CoSaMP(const mat & Phi, const vec & u, int K, int max_iter, double tol1, int D){

  assert(K<= 2*D);
  assert(K>=1);

  assert(Phi.rows() == Phi.cols());
  assert(Phi.rows() == D);
  assert(u.size() == D);
  

  vec Sest = zeros(D);
  vec utrue = Sest;
  vec v = u;
  int t=1;
  ivec T2;

  while (t<max_iter){
    ivec z = sort_index(fabs(Phi.transpose() * v));
    z = reverse(z);
    ivec Omega = head(z,2*K);
    ivec T=sort_union(Omega,T2);
    mat phit=get_cols(Phi, T);
    vec b;
    bool ret = backslash(phit, u, b);
    assert(ret);
    b= fabs(b);
    ivec z3 = sort_index(b);
    z3 = reverse(z3);
    Sest=zeros(D);
    for (int i=0; i< K; i++)
       set_val(Sest, z3[i], b[z3[i]]);
    ivec z2 = sort_index(fabs(Sest));
    z2 = reverse(z2);
    T2 = head(z2,K-1);
    v=u-Phi*Sest;
    double n2 = max(fabs(v));
    if (n2 < tol1)
        break;
    t++;
  }
  assert(!std::isnan(Sest[0]));
  return Sest;

}


void test_cosamp(){

   mat A= init_mat("0.9528    0.5982    0.8368 ; 0.7041    0.8407    0.5187; 0.9539    0.4428    0.0222", 3, 3);
   vec b= init_vec(" 0.3759 0.8986 0.4290",3);
   int K=1;
   double epsilon =1e-3;
   vec ret = CoSaMP(A,b,K,10, epsilon,3);
   vec right = init_vec("0 1.2032 0", 3);
   double diff = norm(ret - right);
   assert(diff <1e-4);

}


#endif


================================================
FILE: toolkits/collaborative_filtering/doc.cpp
================================================
/* 
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

// This file contains the documentation for the matrix factorization
// toolkit.

/**
\defgroup toolkit_matrix_factorization Matrix Factorization

\section toolkit_matrix_factorization_intro Introduction

The GraphLab Matrix Factorization toolkit can be used for a wide range
of applications in collaborative filtering and recommendation
systems. Matrix factorization can be cast as a graph processing by
interpreting the typically sparse matrix as a graph and then treating
the vertices as rows and columns.  

In this library we have implemented the following initial algorithms:

\li Alternating Least Squares \ref als.cpp
\li Positive Singular Value Decomposition \ref svd.cpp


\todo Finish documenting the Matrix factorization toolkit description.


*/ 


================================================
FILE: toolkits/collaborative_filtering/eigen_serialization.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#ifndef EIGEN_SERIALIZATION_HPP
#define EIGEN_SERIALIZATION_HPP


#include <Eigen/Dense>

#include <graphlab.hpp>


/**
 * \brief Use the GraphLab serialization macros to add a save routine
 * to the Eigen::VectorXd object.
 */
BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  const index_type size = vec.size();
  arc << size;
  graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


/**
 * \brief Use the GraphLab serialization macros to add a load routine
 * to the Eigen::VectorXd object.
 */
BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  index_type size = 0;
  arc >> size;
  vec.resize(size);
  graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


/**
 * \brief Use the GraphLab serialization macros to add a save routine
 * to the Eigen::MatrixXd object.
 */
BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type;
  typedef Eigen::MatrixXd::Scalar scalar_type;
  const index_type rows = mat.rows();
  const index_type cols = mat.cols();
  arc << rows << cols;
  graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


/**
 * \brief Use the GraphLab serialization macros to add a load routine
 * to the Eigen::MatrixXd object.
 */
BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type; 
  typedef Eigen::MatrixXd::Scalar scalar_type;
  index_type rows=0, cols=0;
  arc >> rows >> cols;
  mat.resize(rows,cols);
  graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


#endif


================================================
FILE: toolkits/collaborative_filtering/eigen_wrapper.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

/**
 * Code by Danny Bickson, CMU
 */
#ifndef EIGEN_WRAPPER
#define EIGEN_WRAPPER


/**
 * SET OF WRAPPER FUNCTIONS FOR EIGEN
 *
 *
 */

#include <iostream>
#include <fstream>
#include <ostream>

#include "Eigen/Dense"
#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET
#include "Eigen/Sparse"
#include "Eigen/Cholesky"
#include "Eigen/Eigenvalues"
#include "Eigen/SVD"
#define EIGEN_DONT_PARALLELIZE //eigen parallel for loop interfers with ours.
using namespace Eigen;

typedef MatrixXd mat;
typedef VectorXd vec;
typedef VectorXi ivec;
typedef MatrixXi imat;
typedef Matrix<size_t, Dynamic, Dynamic> matst;
typedef SparseVector<double> sparse_vec;

inline void debug_print_vec(const char * name,const vec& _vec, int len){
  printf("%s ) ", name);
  for (int i=0; i< len; i++)
    if (_vec[i] == 0)
      printf("      0    ");
    else printf("%12.4g    ", _vec[i]);
  printf("\n");
}
inline void debug_print_vec(const char * name,const double* _vec, int len){
  printf("%s ) ", name);
  for (int i=0; i< len; i++)
    if (_vec[i] == 0)
      printf("      0    ");
    else printf("%12.4g    ", _vec[i]);
  printf("\n");
}
mat randn1(int dx, int dy, int col);

template<typename mat, typename data>
inline void set_val(mat &A, int row, int col, data val){
  A(row, col) = val;
}
inline double get_val(const mat &A, int row, int col){
  return A(row, col);
}
inline int get_val(const imat &A, int row, int col){
  return A(row, col);
}
inline vec get_col(const mat& A, int col){
  return A.col(col);
}
inline vec get_row(const mat& A, int row){
  return A.row(row);
}
inline void set_col(mat& A, int col, const vec & val){
  A.col(col) = val;
}
inline void set_row(mat& A, int row, const vec & val){
  A.row(row) = val;
}

inline mat eye(int size){
  return mat::Identity(size, size);
}
inline vec ones(int size){
  return vec::Ones(size);
}
inline vec init_vec(const double * array, int size){
  vec ret(size);
  memcpy(ret.data(), array, size*sizeof(double));
  return ret;
}
inline mat init_mat(const char * string, int row, int col){
  mat out(row, col);
  char buf[2056];
  strcpy(buf, string);
  char *pch = strtok(buf," \r\n\t;");
  for (int i=0; i< row; i++){
    for (int j=0; j< col; j++){
      out(i,j) = atof(pch);
      pch = strtok (NULL, " \r\n\t;");
    }
  }
  return out;
}
inline imat init_imat(const char * string, int row, int col){
  imat out(row, col);
  char buf[2056];
  strcpy(buf, string);
  char *pch = strtok(buf," \r\n\t;");
  for (int i=0; i< row; i++){
    for (int j=0; j< col; j++){
      out(i,j) = atol(pch);
      pch = strtok (NULL, " \r\n\t;");
    }
  }
  return out;
}
inline vec init_vec(const char * string, int size){
  vec out(size);
  char buf[2056];
  strcpy(buf, string);
  char *pch = strtok (buf," \r\n\t;");
  int i=0;
  while (pch != NULL)
    {
      out(i) =atof(pch);
      pch = strtok (NULL, " \r\n\t;");
      i++;
    }
  assert(i == size);
  return out;
}
inline vec init_dbl_vec(const char * string, int size){
  return init_vec(string, size);
}

inline vec zeros(int size){
  return vec::Zero(size);
}
inline mat zeros(int rows, int cols){
  return mat::Zero(rows, cols);
}
inline vec head(const vec& v, int num){
  return v.head(num);
}
inline vec mid(const vec&v, int start, int num){
  return v.segment(start, std::min(num, (int)(v.size()-start)));
}
inline vec tail(const vec&v,  int num){
  return v.segment(v.size() - num, num);
}
inline ivec head(const ivec& v, int num){
  return v.head(num);
}
inline void sort(ivec &a){
  std::sort(a.data(), a.data()+a.size());
}
inline void sort(vec & a){
  std::sort(a.data(), a.data()+a.size());
}
inline ivec sort_index(const vec&a){
  ivec ret(a.size()); 
  std::vector<std::pair<double,int> > D;
  // 	
  D.reserve(a.size());
  for (int i=0;i<a.size();i++)
    D.push_back(std::pair<double,int>(a.coeff(i),i));
  std::sort(D.begin(),D.end());
  for (int i=0;i<a.size();i++)
    { 
      ret[i]=D[i].second;
    } 
  return ret;
}
inline void dot2(const vec&  x1, const vec& x3, mat & Q, int j, int len){
  for (int i=0; i< len; i++){
    Q(i,j) = (x1(i) * x3(i));
  }
}

inline bool ls_solve_chol(const mat &A, const vec &b, vec &result){
  //result = A.jacobiSvd(ComputeThinU | ComputeThinV).solve(b);
  result = A.ldlt().solve(b);
  return true;
}
inline bool ls_solve(const mat &A, const vec &b, vec &result){
  //result = A.jacobiSvd(ComputeThinU | ComputeThinV).solve(b);
  result = A.ldlt().solve(b);
  return true;
}
inline bool chol(mat& sigma, mat& out){
  out = sigma.llt().matrixLLT();
  return true;
}
inline bool backslash(const mat& A, const vec & b, vec & x){
  x = A.jacobiSvd(ComputeThinU | ComputeThinV).solve(b);
  return true;
} 
inline mat transpose(mat & A){
  return A.transpose();
}
inline mat randn(int dx, int dy){
  return randn1(dx,dy,-1);
}
inline void set_diag(mat &A, vec & v){
  A.diagonal()=v;
}
inline mat diag(vec & v){
  return v.asDiagonal();
}

template<typename mat>
inline double sumsum(const mat & A){
  return A.sum();
}
inline double norm(const mat &A, int pow=2){
  return A.squaredNorm();
}
inline mat inv(const mat&A){
  return A.inverse();
}
inline bool inv(const mat&A, mat &out){
  out = A.inverse();
  return true;
}
inline mat outer_product(const vec&a, const vec&b){
  return a*b.transpose();
}
//Eigen does not sort eigenvalues, as done in matlab
inline bool eig_sym(const mat & T, vec & eigenvalues, mat & eigenvectors){
  //
  //Column  of the returned matrix is an eigenvector corresponding to eigenvalue number  as returned by eigenvalues(). The eigenvectors are normalized to have (Euclidean) norm equal to one.
  SelfAdjointEigenSolver<mat> solver(T);
  eigenvectors = solver.eigenvectors();
  eigenvalues = solver.eigenvalues(); 
  ivec index = sort_index(eigenvalues);
  sort(eigenvalues);
  vec eigenvalues2 = eigenvalues.reverse();
  mat T2 = zeros(eigenvectors.rows(), eigenvectors.cols());
  for (int i=0; i< eigenvectors.cols(); i++){
    set_col(T2, index[i], get_col(eigenvectors, i));
  }   
  eigenvectors = T2;
  eigenvalues = eigenvalues2;
  return true;
}

inline vec elem_mult(const vec&a, const vec&b){
  vec ret = a;
  for (int i=0; i<b.size(); i++)
    ret(i) *= b(i);
  return ret;
}
inline sparse_vec elem_mult(const sparse_vec&a, const sparse_vec&b){
  return a.cwiseProduct(b);
}
inline double sum(const vec & a){
  return a.sum();
}
inline double min(const vec &a){
  return a.minCoeff();
}
inline double max(const vec & a){
  return a.maxCoeff();
}
inline vec randu(int size){
  return vec::Random(size);
}
inline double randu(){
  return vec::Random(1)(0);
}
inline ivec randi(int size, int from, int to){
  ivec ret(size);
  for (int i=0; i<size; i++)
    ret[i]= internal::random<int>(from,to);
  return ret;
}
inline int randi(int from, int to){
  return internal::random<int>(from,to);
}
inline ivec concat(const ivec&a, const ivec&b){ 
  ivec ret(a.size()+b.size());
  ret << a,b;
  return ret;
}
inline void del(ivec&a, int i){
  memcpy(a.data()+i, a.data() + i+1, (a.size() - i - 1)*sizeof(int)); 
  a.conservativeResize(a.size() - 1); //resize without deleting values!
}
inline mat get_cols(const mat&A, ivec & cols){
  mat a(A.rows(), cols.size());
  for (int i=0; i< cols.size(); i++)
    set_col(a, i, get_col(A, cols[i]));
  return a;
}
inline mat get_cols(const mat&A, int start_col, int end_col){
  assert(end_col > start_col);
  assert(end_col <= A.cols());
  assert(start_col >= 0);
  mat a(A.rows(), end_col-start_col);
  for (int i=0; i< end_col-start_col; i++)
    set_col(a, i, get_col(A, i));
  return a;
}
inline void set_val(vec & v, int pos, double val){
  v(pos) = val;
}
inline double dot(const vec&a, const vec& b){
  return a.dot(b);
}
inline vec reverse(vec& a){
  return a.reverse();
}
inline ivec reverse(ivec& a){
  return a.reverse();
}
inline const double * data(const mat &A){
  return A.data();
}
inline const int * data(const imat &A){
  return A.data();
}
inline const double * data(const vec &v){
  return v.data();
}

class it_file{
  std::fstream fb;

public:
  it_file(const char * name){
    fb.open(name, std::fstream::in);
    fb.close();

    if (fb.fail()){
      fb.clear(std::fstream::failbit);
      fb.open(name, std::fstream::out | std::fstream::trunc );
    }
    else {
      fb.open(name, std::fstream::in);
    }
   
    if (!fb.is_open()){
      perror("Failed opening file ");
      printf("filename is: %s\n", name);
      assert(false);
    }
  
  };
  std::fstream & operator<<(const std::string str){
    int size = str.size();
    fb.write((char*)&size, sizeof(int));
    assert(!fb.fail());
    fb.write(str.c_str(), size);
    return fb;
  }
  std::fstream &operator<<(mat & A){
    int rows = A.rows(), cols = A.cols();
    fb.write( (const char*)&rows, sizeof(int));
    fb.write( (const char *)&cols, sizeof(int));
    for (int i=0; i< A.rows(); i++)
      for (int j=0; j< A. cols(); j++){
        double val = A(i,j);
        fb.write( (const char *)&val, sizeof(double));
        assert(!fb.fail());
      }
    return fb;
  }
  std::fstream &operator<<(const vec & v){
    int size = v.size();
    fb.write( (const char*)&size, sizeof(int));
    assert(!fb.fail());
    for (int i=0; i< v.size(); i++){
      double val = v(i);
      fb.write( (const char *)&val, sizeof(double));
      assert(!fb.fail());
    }
    return fb;
  }
  std::fstream & operator<<(const double &v){
    fb.write((const char*)&v, sizeof(double));
    return fb;
  }
  std::fstream & operator>>(std::string  str){
    int size = -1;
    fb.read((char*)&size, sizeof(int));
    if (fb.fail() || fb.eof()){
      perror("Failed reading file");
      assert(false);
    }
     
    char buf[256];
    fb.read(buf, std::min(256,size));
    assert(!fb.fail());
    assert(!strncmp(str.c_str(), buf, std::min(256,size)));
    return fb;
  }

  std::fstream &operator>>(mat & A){
    int rows, cols;
    fb.read( (char *)&rows, sizeof(int));
    assert(!fb.fail());
    fb.read( (char *)&cols, sizeof(int));
    assert(!fb.fail());
    A = mat(rows, cols);
    double val;
    for (int i=0; i< A.rows(); i++)
      for (int j=0; j< A. cols(); j++){
        fb.read((char*)&val, sizeof(double));
        assert(!fb.fail());
        A(i,j) = val;
      }
    return fb;
  }
  std::fstream &operator>>(vec & v){
    int size;
    fb.read((char*)&size, sizeof(int));
    assert(!fb.fail());
    assert(size >0);
    v = vec(size);
    double val;
    for (int i=0; i< v.size(); i++){
      fb.read((char*)& val, sizeof(double));
      assert(!fb.fail());
      v(i) = val;
    }
    return fb;
  }

  std::fstream &operator>>(double &v){
    fb.read((char*)&v, sizeof(double));
    assert(!fb.fail());
    return fb;
  }

  void close(){
    fb.close();
  }
};

#define Name(a) std::string(a)
inline void set_size(sparse_vec &v, int size){
  //did not find a way to declare vector dimension, yet
}
inline void set_new(sparse_vec&v, int ind, double val){
  v.insert(ind) = val;
} 
inline int nnz(sparse_vec& v){
  return v.nonZeros();
}
inline int get_nz_index(sparse_vec &v, sparse_vec::InnerIterator& i){
  return i.index();
}
inline double get_nz_data(sparse_vec &v, sparse_vec::InnerIterator& i){
  return i.value();
}
#define FOR_ITERATOR(i,v)                       \
  for (sparse_vec::InnerIterator i(v); i; ++i)

template<typename T>
inline double sum_sqr(const T& a);

template<>
inline double sum_sqr<vec>(const vec & a){
  vec ret = a.array().pow(2);
  return ret.sum();
}
template<>
inline double sum_sqr<sparse_vec>(const sparse_vec & a){
  double sum=0;
  FOR_ITERATOR(i,a){
    sum+= powf(i.value(),2);
  }
  return sum;
}

inline double trace(const mat & a){
  return a.trace();
}
inline double get_nz_data(sparse_vec &v, int i){
  assert(nnz(v) > i);
  int cnt=0;
  FOR_ITERATOR(j, v){
    if (cnt == i){
      return j.value();
    }
    cnt++;
  }
  return 0.0;
}
inline void print(sparse_vec & vec){
  int cnt = 0;
  FOR_ITERATOR(i, vec){
    std::cout<<get_nz_index(vec, i)<<":"<< get_nz_data(vec, i) << " ";
    cnt++;
    if (cnt >= 20)
      break;
  }
  std::cout<<std::endl;
}
inline vec pow(const vec&v, int exponent){
  vec ret = vec(v.size());
  for (int i=0; i< v.size(); i++)
    ret[i] = powf(v[i], exponent);
  return ret;
}
inline double dot_prod(sparse_vec &v1, sparse_vec & v2){
  return v1.dot(v2);
}
inline double dot_prod(const vec &v1, const vec & v2){
  return v1.dot(v2);
}
inline double dot_prod(sparse_vec &v1, const vec & v2){
  double sum = 0;
  for (int i=0; i< v2.size(); i++){
    sum+= v2[i] * v1.coeffRef(i);
  }
  return sum;
}
inline vec cumsum(vec& v){
  vec ret = v;
  for (int i=1; i< v.size(); i++)
    for (int j=0; j< i; j++)
      ret(i) += v(j);
  return ret;
}
inline double get_val(sparse_vec & v1, int i){ //TODO optimize performance
  for (sparse_vec::InnerIterator it(v1); it; ++it)
    if (it.index() == i)
      return it.value();

  return 0;
} 
inline double get_val(vec & v1, int i){
  return v1(i);
}
inline void set_div(sparse_vec&v, sparse_vec::InnerIterator i, double val){
  v.coeffRef(i.index()) /= val;
}
inline sparse_vec minus(sparse_vec &v1,sparse_vec &v2){
  return v1-v2;
}
inline vec minus( sparse_vec &v1,  vec &v2){
  vec ret = -v2;
  FOR_ITERATOR(i, v1){
    ret[i.index()] += i.value();
  }
  return ret;
}
inline void plus( vec &v1,  sparse_vec &v2){
  FOR_ITERATOR(i, v2){
    v1[i.index()] += i.value();
  }
}
inline void minus( vec &v1, sparse_vec &v2){
  FOR_ITERATOR(i, v2){
    v1[i.index()] -= i.value();
  }
}
inline sparse_vec fabs( sparse_vec & dvec1){
  sparse_vec ret = dvec1;
  FOR_ITERATOR(i, ret){
    ret.coeffRef(i.index()) = fabs(i.value()); 
  }	
  return ret;
};

inline vec fabs( const vec & dvec1){
  vec ret(dvec1.size());
  for (int i=0; i< dvec1.size(); i++){
    ret(i) = fabs(dvec1(i));
  }	
  return ret;
};
inline double abs_sum(const mat& A){
  double sum =0;
  for (int i=0; i< A.rows(); i++)
    for (int j=0; j< A.cols(); j++)
      sum += fabs(A(i,j));
  return sum;
}
inline double abs_sum(const vec &v){
  double sum =0;
  for (int i=0; i< v.size(); i++)
    sum += fabs(v(i));
  return sum;
}
inline double sum(const sparse_vec &v){
  double sum =0;
  FOR_ITERATOR(i, v){
    sum += i.value();
  }
  return sum;
}
inline vec sqrt(const vec & v){
  vec ret(v.size());
  for (int i=0; i< v.size(); i++){
    ret[i] = std::sqrt(v(i));
  }
  return ret;
}
inline void svd(const mat & A, mat & U, mat & V, vec & singular_values){
  Eigen::JacobiSVD<mat> svdEigen(A, Eigen::ComputeFullU | Eigen::ComputeFullV);
  U= svdEigen.matrixU();
  V= svdEigen.matrixV();
  singular_values =svdEigen.singularValues(); 
}
#endif


================================================
FILE: toolkits/collaborative_filtering/implicit.hpp
================================================
#ifndef _IMPLICIT_HPP__
#define _IMPLICIT_HPP__
/**
 * @file
 * @author  Danny Bickson
 * @version 1.0
 *
 * @section LICENSE
 *
 * Copyright [2012] [Carnegie Mellon University]
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * header file for handling the addition of implicit edges
 */

#include "eigen_wrapper.hpp"
#include "stats.hpp"

enum{
  IMPLICIT_RATING_DISABLED = 0,
  IMPLICIT_RATING_RANDOM = 1
};

double implicitratingweight;
double implicitratingvalue;
double implicitratingpercentage;
int    implicitratingtype;

template<typename als_edge_type>
uint add_implicit_edges4(int type, graph_type & graph, graphlab::distributed_control & dc){

  switch(type){
    case IMPLICIT_RATING_DISABLED: return 0;
    case IMPLICIT_RATING_RANDOM: break;
    default: assert(false);
  };

  uint added = 0;
  size_t M = info.max_user;
  size_t N = info.max_item;
  uint toadd  = implicitratingpercentage*N*M;
  dc.cout()<<"Going to add: " << toadd << " implicit edges. users: " << M << " items: " << N << std::endl;
  assert(toadd >= 1);
  for (uint j=0; j< toadd; j++){
    ivec item = ::randi(1,0,N-1);
    ivec user = ::randi(1,0,M-1);
    graph.add_edge(user[0], -(graphlab::vertex_id_type(item[0] + SAFE_NEG_OFFSET)), als_edge_type(implicitratingvalue, edge_data::TRAIN, implicitratingweight));
    added++;
  } 
  dc.cout()<<"Finished adding " << toadd << " implicit edges. " << std::endl;
  return added;
};

template<typename als_edge_type>
uint add_implicit_edges(int type, graph_type & graph, graphlab::distributed_control & dc){

  switch(type){
    case IMPLICIT_RATING_DISABLED: return 0;
    case IMPLICIT_RATING_RANDOM: break;
    default: assert(false);
  };

  uint added = 0;
  size_t M = info.max_user;
  size_t N = info.max_item;
  uint toadd  = implicitratingpercentage*N*M;
  dc.cout()<<"Going to add: " << toadd << " implicit edges. users: " << M << " items: " << N <<std::endl;
  assert(toadd >= 1);
  for (uint j=0; j< toadd; j++){
    ivec item = ::randi(1,0,N-1);
    ivec user = ::randi(1,0,M-1);
    graph.add_edge(user[0], -(graphlab::vertex_id_type(item[0] + SAFE_NEG_OFFSET)), als_edge_type(implicitratingvalue));
    added++;
  } 
  dc.cout()<<"Finished adding " << toadd << " implicit edges. " << std::endl;
  return added;
};

void parse_implicit_command_line(graphlab::command_line_options & clopts){
   clopts.attach_option("implicitratingweight", implicitratingweight,"implicit rating weight");
   clopts.attach_option("implicitratingvalue", implicitratingvalue, "implicit rating value");
   clopts.attach_option("implicitratingtype", implicitratingtype, "implicit rating type (-=disabled, 1=random)");
   if (implicitratingtype != IMPLICIT_RATING_RANDOM && implicitratingtype != IMPLICIT_RATING_DISABLED)
     logstream(LOG_FATAL)<<"Implicit rating type should be either 0 (IMPLICIT_RATING_DISABLED) or 1 (IMPLICIT_RATING_RANDOM)" << std::endl;
   clopts.attach_option("implicitratingpercentage", implicitratingpercentage, "implicit rating percentage (1e-8,0.8)");
   if (implicitratingpercentage < 1e-8 && implicitratingpercentage > 0.8)
     logstream(LOG_FATAL)<<"Implicit rating percentage should be (1e-8, 0.8)" << std::endl;
  clopts.attach_option("users", info.max_user, "max user id (for implicit ratings)");
  clopts.attach_option("items", info.max_item, "max item id (for implicit ratings)");

}
#endif //_IMPLICIT_HPP__


================================================
FILE: toolkits/collaborative_filtering/make_synthetic_als_data.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <Eigen/Dense>
#include <graphlab.hpp>

typedef Eigen::VectorXd vec_type;
typedef Eigen::MatrixXd mat_type;


#include <graphlab/util/stl_util.hpp>
#include <graphlab/macros_def.hpp>


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Creates a folder with synthetic training data";
  graphlab::command_line_options clopts(description, false);
  std::string output_folder = "synthetic_data";
  size_t nfiles            = 5;
  size_t D                 = 20;
  size_t nusers            = 1000;
  size_t nmovies           = 10000;
  size_t nvalidate       = 2;
  size_t npredict          = 1;
  double noise             = 0.1;
  double stdev             = 2;
  double alpha             = 1.8;


  clopts.attach_option("dir", output_folder,
                       "Location to create the data files");
  clopts.attach_option("nfiles", nfiles,
                       "The number of files to generate.");
  clopts.attach_option("D", D, "Number of latent dimensions.");
  clopts.attach_option("nusers", nusers,
                       "The number of users.");
  clopts.attach_option("nmovies", nmovies,
                       "The number of movies.");
  clopts.attach_option("alpha", alpha,
                       "The power-law constant.");
  clopts.attach_option("nvalidate", nvalidate,
                       "The validate ratings pers user");
  clopts.attach_option("npredict", npredict,
                       "The predict ratings pers user");

  clopts.attach_option("noise", noise,
                       "The standard deviation noise parameter");
  clopts.attach_option("stdev", stdev,
                       "The standard deviation in latent factor values");

  if(!clopts.parse(argc, argv)) {
    std::cout << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  std::cout << "Creating data directory: " << output_folder << std::endl;
  boost::filesystem::path directory(output_folder);
  if(!boost::filesystem::create_directory(output_folder)) {
    logstream(LOG_ERROR) 
      << "Error creating directory: " << directory << std::endl;
    return EXIT_FAILURE;
  }

  std::cout << "Opening files:" << std::endl;
  std::vector< std::ofstream* > train_files(nfiles);
  std::vector< std::ofstream* > validate_files(nfiles);
  std::vector< std::ofstream* > predict_files(nfiles);
  for(size_t i = 0; i < nfiles; ++i) {
    const std::string train_fname = 
      output_folder + "/graph_" + graphlab::tostr(i) + ".tsv";
    train_files[i] = new std::ofstream(train_fname.c_str());
    if(!train_files[i]->good()) {
      logstream(LOG_ERROR) 
        << "Error creating file: " << train_fname;
    }

    const std::string validate_fname = 
      output_folder + "/graph_" + graphlab::tostr(i) + ".tsv.validate";
    validate_files[i] = new std::ofstream(validate_fname.c_str());
    if(!validate_files[i]->good()){
      logstream(LOG_ERROR) 
        << "Error creating file: " << train_fname;
    }       

    const std::string predict_fname = 
      output_folder + "/graph_" + graphlab::tostr(i) + ".tsv.predict";
    predict_files[i] = new std::ofstream(predict_fname.c_str());
    if(!predict_files[i]->good()){
      logstream(LOG_ERROR) 
        << "Error creating file: " << train_fname;
    }       
  }
  

  // Make synthetic latent factors
  std::vector< vec_type > user_factors(nusers);
  std::vector< vec_type > movie_factors(nmovies);
  // Create a shared random number generator
  graphlab::random::generator gen; gen.seed(31413);
  
  std::cout << "Constructing latent user factors" << std::endl;
  foreach(vec_type& factor, user_factors) {
    factor.resize(D);
    // Randomize the factor
    for(size_t d = 0; d < D; ++d) 
      factor(d) = gen.gaussian(0, stdev);
  }

  std::cout << "Constructing latent movie factors" << std::endl;
  foreach(vec_type& factor, movie_factors) {
    factor.resize(D);
    // Randomize the factor
    for(size_t d = 0; d < D; ++d) 
      factor(d) = gen.gaussian(0, stdev);
  }

  size_t nedges_train = 0;
  ASSERT_GT(nusers, nvalidate + npredict);
  // Make power-law probability vector
  std::vector<double> prob(nusers - nvalidate - npredict);
  for(size_t i = 0; i < prob.size(); ++i)
    prob[i] = std::pow(double(i+1), -alpha);
  graphlab::random::pdf2cdf(prob);
  for(size_t movie_id = 0, user_id = 0; movie_id < nmovies; ++movie_id) {
    // Add power-law out degree ratings
    const size_t out_degree = gen.multinomial_cdf(prob) + 1;
    for(size_t i = 0; i < out_degree; ++i) {
      user_id = (user_id + 2654435761)  % nusers;
      const size_t file_id = user_id % nfiles;
      const double rating = 
        user_factors[user_id].dot(movie_factors[movie_id]);
      *(train_files[file_id])
        << user_id << '\t' << (movie_id + nusers) << '\t' << rating << '\n';
      nedges_train++;
    }
    // Add a few extra validate ratings
    for(size_t i = 0; i < nvalidate; ++i) {
      user_id = (user_id + 2654435761)  % nusers;
      const size_t file_id = user_id % nfiles;
      const double rating = 
        user_factors[user_id].dot(movie_factors[movie_id]);
      *(validate_files[file_id])
        << user_id << '\t' << (movie_id + nusers) << '\t' << rating << '\n';
    }
    // Add a few extra predict ratings
    for(size_t i = 0; i < npredict; ++i) {
      user_id = (user_id + 2654435761)  % nusers;
      const size_t file_id = user_id % nfiles;
      *(predict_files[file_id])
        << user_id << '\t' << (movie_id + nusers) << '\n';
    }
  } // end of loop over movies
  for(size_t i = 0; i < nfiles; ++i) {
    train_files[i]->close(); 
    delete train_files[i]; train_files[i] = NULL;
    validate_files[i]->close(); 
    delete validate_files[i]; validate_files[i] = NULL;
    predict_files[i]->close(); 
    delete predict_files[i]; predict_files[i] = NULL;
  }
  std::cout << "Created " << nedges_train 
            << " training edges." << std::endl;


} // end of main


================================================
FILE: toolkits/collaborative_filtering/math.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef _MATH_HPP
#define _MATH_HPP

#include "types.hpp"
#include "graphlab.hpp"
#include "graphlab/util/tracepoint.hpp"


DECLARE_TRACER(Axbtrace);
DECLARE_TRACER(Axbtrace2);
DECLARE_TRACER(vecequals);
DECLARE_TRACER(orthogonalize_vs_alltrace);
DECLARE_TRACER(als_lapack_trace);
DECLARE_TRACER(orth1);
DECLARE_TRACER(orth2);
DECLARE_TRACER(orth3);

double regularization = 0;
bool debug;
bool regnormal;

void print_vec(const char * name, const vec & pvec, bool high);

struct math_info{
  //for Axb operation
  int increment;
  double  c;
  double  d;
  int x_offset, b_offset , y_offset, r_offset, div_offset, prev_offset, mat_offset, vec_offset;
  int orthogonalization;
  bool A_offset, A_transpose;
  std::vector<std::string> names;
  bool use_diag;
  int ortho_repeats;
  int start, end;
  bool update_function;

  //for backslash operation
  bool dist_sliced_mat_backslash;
  mat eDT;
  double maxval, minval;

  math_info(){
    reset_offsets();
  }

  void reset_offsets(){
    increment = 2;
    c=1.0; d=0.0;
    x_offset = b_offset = y_offset = r_offset = div_offset = prev_offset = mat_offset = vec_offset = -1;
    A_offset = false;
    A_transpose = false;
    use_diag = true;
    start = end = -1;
    update_function = false;
    dist_sliced_mat_backslash = false;
    orthogonalization = 0;
  }
  int increment_offset(){
    return increment++;
  }


};


bipartite_graph_descriptor info;
math_info mi;
class DistMat; 
class DistDouble;
class DistSlicedMat;
DistSlicedMat * curMat = NULL;
gather_type alphas;
gather_type sum_alpha;


#define MAX_PRINT_ITEMS 25
double runtime = 0;

using namespace graphlab;

vec curvec;
/***
 * UPDATE FUNCTION (ROWS)
 */
class Axb :
  public graphlab::ivertex_program<graph_type, double>,
  public graphlab::IS_POD_TYPE {
    float last_change;
    public:
    /* Gather the weighted rank of the adjacent page   */
    double gather(icontext_type& context, const vertex_type& vertex,
        edge_type& edge) const {

      if (edge.data().role == edge_data::PREDICT)
         return 0;

      bool brows = vertex.id() < (uint)info.get_start_node(false);
      if (info.is_square()) 
        brows = !mi.A_transpose;
      if (mi.A_offset  && mi.x_offset >= 0){
        double val = edge.data().obs * (brows ? edge.target().data().pvec[mi.x_offset] :
            edge.source().data().pvec[mi.x_offset]);
        //printf("gather edge on vertex %d val %lg obs %lg\n", vertex.id(), val, edge.data().obs);
        return val;
      }
      //printf("edge on vertex %d val %lg\n", vertex.id(), 0.0);
      return 0;
    }

    /* Use the total rank of adjacent pages to update this page */
    void apply(icontext_type& context, vertex_type& vertex,
        const double& total) {

      //printf("Entered apply on node %d value %lg\n", vertex.id(), total);
      vertex_data & user = vertex.data();
      assert(mi.x_offset >=0 || mi.y_offset >= 0);
      assert(mi.r_offset >=0);

      /* perform orthogonalization of current vector */
      if (mi.orthogonalization){
         for (int i=mi.mat_offset; i< mi.vec_offset; i++){
            vertex.data().pvec[mi.vec_offset] -= alphas.pvec[i-mi.mat_offset] * vertex.data().pvec[i]; 
         }
         return;
      }

      double val = total;
      //assert(total != 0 || mi.y_offset >= 0);

      //store previous value for convergence detection
      if (mi.prev_offset >= 0)
        user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset];

      assert(mi.x_offset >=0 || mi.y_offset>=0);
      if (mi.A_offset  && mi.x_offset >= 0){
        if  (info.is_square() && mi.use_diag)// add the diagonal term
          val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]);
        //printf("node %d added diag term: %lg\n", vertex.id(), user.A_ii);
        val *= mi.c;
      }
      /***** COMPUTE r = c*I*x  *****/
      else if (!mi.A_offset && mi.x_offset >= 0){
        val = mi.c*user.pvec[mi.x_offset];
      }

      /**** COMPUTE r+= d*y (optional) ***/
      if (mi.y_offset>= 0){
        val += mi.d*user.pvec[mi.y_offset]; 
      }

      /***** compute r = (... ) / div */
      if (mi.div_offset >= 0){
        val /= user.pvec[mi.div_offset];
      }

      user.pvec[mi.r_offset] = val;
      //printf("Exit apply on node %d value %lg\n", vertex.id(), val);
    }

    edge_dir_type gather_edges(icontext_type& context,
        const vertex_type& vertex) const {
      if (vertex.id() < rows)
        return OUT_EDGES;
      else return IN_EDGES;
    }


    edge_dir_type scatter_edges(icontext_type& context,
        const vertex_type& vertex) const {
      return NO_EDGES;
    }

    /* The scatter function just signal adjacent pages */
    //void scatter(icontext_type& context, const vertex_type& vertex,
    //    edge_type& edge) const {
    //}

  }; 

void init_lanczos_mapr( graph_type::vertex_type& vertex) {
  assert(actual_vector_len > 0);
  vertex.data().pvec = zeros(actual_vector_len);
} 


void init_math(graph_type * _pgraph, bipartite_graph_descriptor & _info, double ortho_repeats = 3, 
    bool update_function = false){
  pgraph = _pgraph;
  info = _info;
  mi.reset_offsets();
  mi.update_function = update_function;
  mi.ortho_repeats = ortho_repeats;
}


class DistVec{
  public:
    int offset; //real location in memory
    int display_offset; //offset to print out
    int prev_offset;
    std::string name; //optional
    bool transpose;
    bipartite_graph_descriptor info;
    int start; 
    int end;

    void init(){
      start = info.get_start_node(!transpose);
      end = info.get_end_node(!transpose);
      assert(start < end && start >= 0 && end >= 1);
      //debug_print(name);
    };

    int size(){ return end-start; }

    DistVec(const bipartite_graph_descriptor &_info, int _offset, bool _transpose, const std::string & _name){
      offset = _offset;
      display_offset = _offset;
      name = _name;
      info = _info;
      transpose = _transpose;
      prev_offset = -1;
      init();
    }
    DistVec(const bipartite_graph_descriptor &_info, int _offset, bool _transpose, const std::string & _name, int _prev_offset){
      offset = _offset;
      display_offset = _offset;
      name = _name;
      info = _info;
      transpose = _transpose;
      assert(_prev_offset < data_size);
      prev_offset = _prev_offset;
      init();
    }


    DistVec& operator-(){
      mi.d=-1.0;
      return *this; 
    }
    DistVec& operator-(const DistVec & other){
      mi.x_offset = offset;
      mi.y_offset = other.offset;
      transpose = other.transpose;
      if (mi.d == 0)
        mi.d = -1.0;
      else 
        mi.d*=-1.0;
      return *this;
    }
    DistVec& operator+(){
      if (mi.d == 0)
        mi.d=1.0;
      return *this;
    }
    DistVec& orthogonalize(){
      mi.orthogonalization = 1;
      return *this;
    }

    DistVec& operator+(const DistVec &other){
      mi.x_offset =offset;
      mi.y_offset = other.offset;
      transpose = other.transpose;
      return *this; 
    }
    DistVec& operator+(const DistMat &other);

    DistVec& operator-(const DistMat &other);

    DistVec& operator/(const DistVec &other){
      mi.div_offset = other.offset;
      return *this;
    }
    DistVec& operator/(const DistDouble & other);

    DistVec& operator/(double val){
      assert(val != 0);
      assert(mi.d == 0);
      mi.d = 1/val;
      return *this;
    }

    DistVec& operator=(const DistVec & vec);

    DistVec& operator=(const vec & pvec);

    vec to_vec(int dmax = -1, int doffset = -1);


    void debug_print(const char * name){
      if (debug){
        std::cout<<name<<"["<<display_offset<<"]" << std::endl;
        vec pvec = this->to_vec(MAX_PRINT_ITEMS, mi.r_offset == -1? offset:mi.r_offset);
        for (int i=0; i< pvec.size(); i++){  
          //TODO printf("%.5lg ", fabs(pgraph->vertex_data(i).pvec[(mi.r_offset==-1)?offset:mi.r_offset]));
          printf("%.5lg ", fabs(pvec[i]));
        }
        printf("\n");
      }
    }
    void debug_print(std::string name){ return debug_print(name.c_str());}

    double operator[](int i){
      assert(i < end - start);
      assert(false);
      // TODO   return pgraph->vertex_data(i+start).pvec[offset];
    }

    DistDouble operator*(const DistVec & other);

    DistVec& operator*(const double val){
      assert(val!= 0);
      mi.d=val;
      return *this;
    }
    DistVec& operator*(const DistDouble &dval);

    DistMat &operator*(DistMat & v);

    DistVec& _transpose() { 
      /*if (!config.square){
        start = n; end = m+n;
        }*/
      return *this;
    }

    DistVec& operator=(DistMat &mat);

};

DistVec * pcurrent = NULL;


class DistSlicedMat{
  public:
    bipartite_graph_descriptor info;
    int start_offset;
    int end_offset; 
    std::string name; //optional
    int start;
    int end;
    bool transpose;

    DistSlicedMat(int _start_offset, int _end_offset, bool _transpose, const bipartite_graph_descriptor &_info, std::string _name){
      assert(_start_offset < _end_offset);
      assert(_start_offset >= 0);
      assert(_info.total() > 0);
      transpose = _transpose;
      info = _info;
      init();
      start_offset = _start_offset;
      end_offset = _end_offset;
      name = _name;
    }

    DistSlicedMat& operator=(DistMat & other);

    void init(){
      start = info.get_start_node(!transpose);
      end = info.get_end_node(!transpose);
      assert(start < end && start >= 0 && end >= 1);
      //debug_print(name);
    };

    int size(int dim){ return (dim == 1) ? (end-start) : (end_offset - start_offset) ; }

    void set_cols(int start_col, int end_col, const mat& pmat){
      assert(start_col >= 0);
      assert(end_col <= end_offset - start_offset);
      assert(pmat.rows() == end-start);
      assert(pmat.cols() >= end_col - start_col);
      for (int i=start_col; i< end_col; i++)
        this->operator[](i) = get_col(pmat, i-start_col);
    }
    mat get_cols(int start_col, int end_col){
      assert(start_col < end_offset - start_offset);
      assert(start_offset + end_col <= end_offset);
      mat retmat = zeros(end-start, end_col - start_col);
      for (int i=start_col; i< end_col; i++)
        set_col(retmat, i-start_col, this->operator[](i-start_col).to_vec());
      return retmat;
    }

    void operator=(mat & pmat){
      assert(end_offset-start_offset <= pmat.cols());
      assert(end-start == pmat.rows());
      set_cols(0, pmat.cols(), pmat);
    }

    std::string get_name(int pos){
      assert(pos < end_offset - start_offset);
      assert(pos >= 0);
      return name;
    }

    DistVec operator[](int pos){
      assert(pos < end_offset-start_offset);
      assert(pos >= 0);
      DistVec ret(info, start_offset + pos, transpose, get_name(pos));
      ret.display_offset = pos;
      return ret;
    }

};


void assign_vec(graph_type::vertex_type & vertex){
  if (!info.is_square())
    assert(vertex.id() - pcurrent->start >= 0 && vertex.id() - pcurrent->start < curvec.size());
  vertex.data().pvec[pcurrent->offset] = curvec[vertex.id() - pcurrent->start];
}  

gather_type output_vector(const graph_type::vertex_type & vertex){
   assert(pcurrent && pcurrent->offset >= 0 && pcurrent->offset < vertex.data().pvec.size());
   gather_type ret;
   assert(pcurrent->end - pcurrent->start > 0);
   assert(vertex.id() - pcurrent->start >= 0);
   ret.pvec = vec::Zero(pcurrent->end - pcurrent->start);
   ret.pvec[vertex.id() - pcurrent->start] = vertex.data().pvec[pcurrent->offset];
   return ret;
}
bool select_in_range(const graph_type::vertex_type & vertex){
   return vertex.id() >= (uint)pcurrent->start && vertex.id() < (uint)pcurrent->end;
}
DistVec& DistVec::operator=(const DistVec & vec){
      assert(offset < (info.is_square() ? 2*data_size: data_size));
      if (mi.x_offset == -1 && mi.y_offset == -1){
        mi.y_offset = vec.offset;
      }  
      mi.r_offset = offset;
      assert(prev_offset < data_size);
      mi.prev_offset = prev_offset;
      if (mi.d == 0.0)
        mi.d=1.0;
      transpose = vec.transpose;
      end = vec.end; 
      start = vec.start;
      mi.start = start;
      mi.end = end;
      INITIALIZE_TRACER(Axbtrace2, "Update function Axb");
      BEGIN_TRACEPOINT(Axbtrace2);
      pcurrent = (DistVec*)&vec;
      start_engine();
      debug_print(name);
      mi.reset_offsets();
      return *this;
    }

DistVec& DistVec::operator=(const vec & pvec){
  assert(offset >= 0);
  assert(pvec.size() == info.num_nodes(true) || pvec.size() == info.num_nodes(false));
  assert(start < end);
  if (!info.is_square() && pvec.size() == info.num_nodes(false)){
    transpose = true;
  }
  else {
    transpose = false;
  }
  //#pragma omp parallel for    
  INITIALIZE_TRACER(vecequals, "vector assignment");
  BEGIN_TRACEPOINT(vecequals);
  //for (int i=start; i< end; i++){  
  //  pgraph->vertex_data(i).pvec[offset] = pvec[i-start];
  //}
  pcurrent = this;
  curvec = pvec;
  graphlab::vertex_set nodes = pgraph->select(select_in_range);
  pgraph->transform_vertices(assign_vec, nodes);
  END_TRACEPOINT(vecequals);
  debug_print(name);
  return *this;       
}

vec DistVec::to_vec(int dmax, int doffset){
  pcurrent = this;
  if (doffset >= 0)
    pcurrent->offset = doffset;
  if (dmax >= 0)
    pcurrent->end = std::min(pcurrent->start + dmax, pcurrent->end);
  graphlab::vertex_set nodes = pgraph->select(select_in_range);
  //    for (int i=start; i< end; i++){
  //      //TODO ret[i-start] = pgraph->vertex_data(i).pvec[offset];
  //    }
  gather_type curvec = pgraph->map_reduce_vertices<gather_type>(output_vector, nodes);
  return curvec.pvec;
}


/*
 * wrapper for computing r = c*A*x+d*b*y
 */
class DistMat{
  public:
    bool transpose;
    bipartite_graph_descriptor info;

    DistMat(const bipartite_graph_descriptor& _info) { 
      info = _info;
      transpose = false;
    };


    DistMat &operator*(const DistVec & v){
      mi.x_offset = v.offset;
      mi.A_offset = true;
      //v.transpose = transpose;
      //r_offset = A_offset;
      return *this;
    }
    DistMat &operator*(const DistDouble &d);

    DistMat &operator-(){
      mi.c=-1.0;
      return *this;
    }

    DistMat &operator/(const DistVec & v){
      mi.div_offset = v.offset;
      return *this;
    }

    DistMat &operator+(){
      mi.c=1.0;
      return *this;
    }
    DistMat &operator+(const DistVec &v){
      mi.y_offset = v.offset;
      if (mi.d == 0.0)
        mi.d=1.0;
      return *this;
    }
    DistMat &operator-(const DistVec &v){
      mi.y_offset = v.offset;
      if (mi.d == 0.0)
        mi.d=-1.0;
      else 
        mi.d*=-1.0;
      return *this;
    }
    DistMat & _transpose(){
      transpose = true;
      mi.A_transpose = true;
      return *this;
    }
    DistMat & operator~(){
      return _transpose();
    }
    DistMat & backslash(DistSlicedMat & U){
      mi.dist_sliced_mat_backslash = true;
      transpose = U.transpose;
      return *this;
    }
    void set_use_diag(bool use){
      mi.use_diag = use;
    }   
};


DistVec& DistVec::operator=(DistMat &mat){
  mi.r_offset = offset;
  assert(prev_offset < data_size);
  mi.prev_offset = prev_offset;
  transpose = mat.transpose;
  mi.start = info.get_start_node(!transpose);
  mi.end = info.get_end_node(!transpose);
  INITIALIZE_TRACER(Axbtrace, "Axb update function");
  BEGIN_TRACEPOINT(Axbtrace);
  pcurrent = this;
  int old_start = start; int old_end = end;
  start = mi.start; end = mi.end;
  start_engine();
  start = old_start; end = old_end;
  END_TRACEPOINT(Axbtrace);
  debug_print(name);
  mi.reset_offsets();
  mat.transpose = false;
  return *this;
}
DistVec& DistVec::operator+(const DistMat &other){
  mi.y_offset = offset;
  transpose = other.transpose;
  return *this; 
}
DistVec& DistVec::operator-(const DistMat & other){
  mi.y_offset = offset;
  transpose = other.transpose;
  if (mi.c == 0)
    mi.c = -1;
  else mi.c *= -1;
  return *this;
}

DistMat& DistVec::operator*(DistMat & v){
  mi.x_offset = offset;
  mi.A_offset = true;
  return v;
}


class DistDouble{
  public:
    double val;
    std::string name;

    DistDouble() {};
    DistDouble(double _val) : val(_val) {};


    DistVec& operator*(DistVec & dval){
      mi.d=val;
      return dval;
    }
    DistMat& operator*(DistMat & mat){
      mi.c = val;
      return mat;
    }

    DistDouble  operator/(const DistDouble dval){
      DistDouble mval;
      mval.val = val / dval.val;
      return mval;
    }
    bool operator<(const double other){
      return val < other;
    }
    DistDouble & operator=(const DistDouble & other){
      val = other.val;
      debug_print(name);
      return *this;
    }
    bool operator==(const double _val){
      return val == _val;
    }
    void debug_print(const char * name){
      std::cout<<name<<" "<<val<<std::endl;
    }
    double toDouble(){
      return val;
    }
    void debug_print(std::string name){ return debug_print(name.c_str()); }


};

DistDouble DistVec::operator*(const DistVec & vec){
  mi.y_offset = offset;
  mi.b_offset = vec.offset;
  if (mi.d == 0) 
    mi.d = 1.0;
  assert(mi.y_offset >=0 && mi.b_offset >= 0);

  double val = 0;
  for (int i=start; i< end; i++){  
    assert(false);//not yet
    //TODO const vertex_data * data = &pgraph->vertex_data(i);
    //TODO double * pv = (double*)&data->pvec[0];
    //TODO val += mi.d* pv[mi.y_offset] * pv[mi.b_offset];
  }
  mi.reset_offsets();
  DistDouble mval;
  mval.val = val;
  return mval;
}
DistVec& DistVec::operator*(const DistDouble &dval){
  mi.d = dval.val;
  return *this;
}


int size(DistMat & A, int pos){
  assert(pos == 1 || pos == 2);
  return A.info.num_nodes(!A.transpose);
}

DistMat &DistMat::operator*(const DistDouble &d){
  mi.c = d.val;
  return *this;
}

DistDouble sqrt(DistDouble & dval){
  DistDouble mval;
  mval.val=sqrt(dval.val);
  return mval;
}

gather_type calc_norm(const graph_type::vertex_type & vertex){
  gather_type ret;
  assert(pcurrent && pcurrent->offset < vertex.data().pvec.size());
  ret.training_rmse = pow(vertex.data().pvec[pcurrent->offset], 2);
  return ret;
}

DistDouble norm(const DistVec &vec){
  assert(vec.offset>=0);
  assert(vec.start < vec.end);

  DistDouble mval;
  mval.val = 0;
  pcurrent = (DistVec*)&vec;
  vertex_set nodes = pgraph->select(select_in_range);
  //for (int i=vec.start; i < vec.end; i++){
    // TODO const vertex_data * data = &pgraph->vertex_data(i);
    //double * px = (double*)&data->pvec[0];
    // mval.val += px[vec.offset]*px[vec.offset];
  gather_type ret = pgraph->map_reduce_vertices<gather_type>(calc_norm);
  //}
  mval.val = sqrt(ret.training_rmse);
  return mval;
}


DistDouble norm(DistMat & mat){
  DistVec vec(info, 0, mat.transpose, "norm");
  vec = mat;
  return norm((const DistVec&)vec);
}

vec diag(DistMat & mat){
  assert(info.is_square());
  vec ret = zeros(info.total());
  for (int i=0; i< info.total(); i++){
    //TODO ret[i] = pgraph->vertex_data(i).A_ii;
    assert(false);
  }
  return ret;
}

int curoffset = -1;
gather_type map_reduce_ortho(const graph_type::vertex_type & vertex){
  gather_type ret;
  assert(curoffset >= 0);
  assert(curMat && curMat->start_offset - pcurrent->offset);
  ret.pvec = vec::Zero(curoffset);
  assert(curMat != NULL && curMat->start_offset < pcurrent->offset);
  //for (int i=mat.start_offset; i< current.offset; i++){
  for (int i=curMat->start_offset; i< pcurrent->offset; i++){
    ret.pvec[i - curMat->start_offset] = vertex.data().pvec[i] * vertex.data().pvec[pcurrent->offset];
  }
  //printf("map_Reduce_ortho: node %d\n", vertex.id());
  //std::cout<<ret.pvec<<std::endl;
  return ret;
}
  gather_type map_reduce_sum_power(const graph_type::vertex_type & vertex){
    gather_type ret;
    assert(pcurrent->offset >= 0 && pcurrent->offset < vertex.data().pvec.size());
    ret.training_rmse = pow(vertex.data().pvec[pcurrent->offset], 2);
    return ret;
  }
  void divide_by_sum(graph_type::vertex_type& vertex){
    assert(pcurrent->offset >= 0 && pcurrent->offset < vertex.data().pvec.size());
    vertex.data().pvec[pcurrent->offset] /= sum_alpha.training_rmse;
  }


  void transform_ortho(graph_type::vertex_type & vertex){
    assert(curMat != NULL && curMat->start_offset < pcurrent->offset);
    for (int i=curMat->start_offset; i< pcurrent->offset; i++){
      //assert(alphas.pvec[i-curMat->start_offset] != 0);
      vertex.data().pvec[pcurrent->offset] -= alphas.pvec[i-curMat->start_offset] * vertex.data().pvec[i]; 
    }
  }

  bool selected_node(const graph_type::vertex_type& vertex){
    if (info.is_square())
      return true;
    else return ((vertex.id() >= (uint)info.get_start_node(!pcurrent->transpose)) &&
        (vertex.id() < (uint)info.get_end_node(!pcurrent->transpose)));
  }


  double orthogonalize_vs_all(DistSlicedMat & mat, int _curoffset, double &alpha){
    assert(mi.ortho_repeats >=1 && mi.ortho_repeats <= 3);
    curoffset = _curoffset;
    curMat = &mat;
    mi.mat_offset = mat.start_offset;
    INITIALIZE_TRACER(orthogonalize_vs_alltrace, "orthogonalization step - optimized");
    BEGIN_TRACEPOINT(orthogonalize_vs_alltrace);
    bool old_debug = debug;
    debug = false;
    DistVec current = mat[curoffset];
    pcurrent =&current;
    mi.vec_offset = pcurrent->offset;
    assert(mat.start_offset <= current.offset); 
    vertex_set nodes = pgraph->select(selected_node);
    if (curoffset > 0){
      for (int j=0; j < mi.ortho_repeats; j++){
        INITIALIZE_TRACER(orth1, "map reduce in ortho");
        BEGIN_TRACEPOINT(orth1);
        alphas = pgraph->map_reduce_vertices<gather_type>(map_reduce_ortho, nodes);
        END_TRACEPOINT(orth1);
        //pgraph->transform_vertices(transform_ortho, nodes);
        mat[_curoffset] = mat[_curoffset].orthogonalize(); 
      } //for ortho_repeast 
    }

    debug = old_debug;
    current.debug_print(current.name);
    INITIALIZE_TRACER(orth2, "map reduce in ortho2");
    BEGIN_TRACEPOINT(orth2);
    sum_alpha = pgraph->map_reduce_vertices<gather_type>(map_reduce_sum_power, nodes);
    END_TRACEPOINT(orth2);
    sum_alpha.training_rmse = sqrt(sum_alpha.training_rmse);
    alpha = sum_alpha.training_rmse;
    if (alpha >= 1e-10 ){
       INITIALIZE_TRACER(orth3, "transform_vertices in ortho3");
       BEGIN_TRACEPOINT(orth3);
       //pgraph->transform_vertices(divide_by_sum, nodes);    
       mat[_curoffset] = mat[_curoffset] / alpha;
       END_TRACEPOINT(orth3);
    }
    END_TRACEPOINT(orthogonalize_vs_alltrace);
    return alpha;
  }


  DistVec& DistVec::operator/(const DistDouble & other){
    assert(other.val != 0);
    assert(mi.d == 0);
    mi.d = 1/other.val;
    return *this;
  }

#endif //_MATH_HPP


================================================
FILE: toolkits/collaborative_filtering/nmf.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * This code iplements the NMF algorithm described in the paper:
 * Lee, D..D., and Seung, H.S., (2001), 'Algorithms for Non-negative Matrix
 * Factorization', Adv. Neural Info. Proc. Syst. 13, 556-562.
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>

#include <Eigen/Dense>
#include "eigen_serialization.hpp"
#include <graphlab/macros_def.hpp>
#include <graphlab/util/timer.hpp>
#include "stats.hpp"

typedef Eigen::VectorXd vec;
typedef Eigen::MatrixXd mat_type;

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
const double epsilon = 1e-16;
static bool debug;
int iter = 0;

bool isuser(uint node){
  return ((int)node) >= 0;
}
/** 
 * \ingroup toolkit_matrix_pvecization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the SGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the SGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The latent pvec for this vertex */
  vec pvec;

  double train_rmse;
  double validation_rmse;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() { if (debug) pvec = vec::Ones(NLATENT); else randomize(); train_rmse = validation_rmse = 0; } 
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(NLATENT); pvec.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << pvec << train_rmse << validation_rmse;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> pvec >>train_rmse >> validation_rmse;
  }
}; // end of vertex data


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data nmfo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float weight;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float weight = 0, data_role_type role = PREDICT) :
    weight(weight), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


double extract_l2_error(const graph_type::edge_type & edge);


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
    const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


class gather_type {
  public:
    vec pvec;
    double training_rmse;
    double validation_rmse;
    gather_type() { training_rmse = validation_rmse = 0; }
    gather_type(const vec & _pvec, double _train_rmse, double _validation_rmse){ pvec = _pvec; training_rmse = _train_rmse; validation_rmse = _validation_rmse; }
    void reset(){ pvec = vec::Zero(vertex_data::NLATENT); training_rmse = 0; validation_rmse = 0; }
    void save(graphlab::oarchive& arc) const { arc << pvec << training_rmse << validation_rmse; }
    void load(graphlab::iarchive& arc) { arc >> pvec >> training_rmse >> validation_rmse; }  
    gather_type& operator+=(const gather_type& other) {
      pvec += other.pvec;
      training_rmse += other.training_rmse;
      validation_rmse += other.validation_rmse;
      return *this;
    } 

};

gather_type x1;
gather_type x2;
gather_type * px;


bool isuser_node(const graph_type::vertex_type& vertex){
  return isuser(vertex.id());
}
/**
 * SGD vertex program type
 */ 
class nmf_vertex_program :
  public graphlab::ivertex_program<graph_type, gather_type, gather_type>,
  public graphlab::IS_POD_TYPE{
    public:
      /** The convergence tolerance */
      static double TOLERANCE;
      static double MAXVAL;
      static double MINVAL;
      static bool debug;
      static size_t MAX_UPDATES;

      /** compute a missing value based on NMF algorithm */
      static float nmf_predict(const vertex_data& user, 
          const vertex_data& movie, 
          const float rating, 
          double & prediction){

        prediction = user.pvec.dot(movie.pvec);
        //truncate prediction to allowed values
        prediction = std::min((double)prediction, nmf_vertex_program::MAXVAL);
        prediction = std::max((double)prediction, nmf_vertex_program::MINVAL);
        //return the squared error
        float err = rating - prediction;
        assert(!std::isnan(err));
        return err*err; 

      }


      /** The set of edges to gather along */
      edge_dir_type gather_edges(icontext_type& context, 
          const vertex_type& vertex) const { 
        //UNUSED 
        return graphlab::ALL_EDGES; 
      }; // end of gather_edges 

      /** The gather function computes XtX and Xy */
      gather_type gather(icontext_type& context, const vertex_type& vertex, 
          edge_type& edge) const {

        if (edge.data().role == edge_data::TRAIN || edge.data().role == edge_data::VALIDATE){
          const vertex_type other_vertex = get_other_vertex(edge, vertex);
          double prediction = 0;
          double rmse = nmf_predict(vertex.data(), other_vertex.data(), edge.data().weight, prediction);
          if (prediction == 0)
            logstream(LOG_FATAL)<<"Got into numerical error!" << std::endl;
          if (edge.data().role == edge_data::TRAIN)
            return gather_type(other_vertex.data().pvec * (edge.data().weight / prediction), rmse, 0);
          else //validation
            return gather_type(vec::Zero(vertex_data::NLATENT), 0, rmse);

        }
        return gather_type(vec::Zero(vertex_data::NLATENT), 0, 0);
      } // end of gather function

      void apply(icontext_type& context, vertex_type& vertex,
          const gather_type& sum) {
        vertex_data& vdata = vertex.data();  
        if (vdata.pvec.sum() != 0){
          for (uint i=0; i< vertex_data::NLATENT; i++){
            vdata.pvec[i] *= sum.pvec[i] / px->pvec[i];
            ASSERT_NE(px->pvec[i] , 0);
            if (vdata.pvec[i] < epsilon)
              vdata.pvec[i] = epsilon;
          }
        }
        vdata.train_rmse = sum.training_rmse;
        vdata.validation_rmse = sum.validation_rmse;
      }

      edge_dir_type scatter_edges(icontext_type& context,
          const vertex_type& vertex) const { 
        //UNUSED 
        return graphlab::ALL_EDGES; 
      }; // end of scatter edges

      void scatter(icontext_type& context, const vertex_type& vertex, 
          edge_type& edge) const {
        //we do not schedule any more neighbors to run
      } 
      static void verify_rows(graph_type::vertex_type& vertex){
        if (isuser(vertex.id()) && vertex.num_out_edges() == 0)
          logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl;
      }

      static gather_type pre_iter(const graph_type::vertex_type & vertex){
        gather_type ret;
        ret.pvec = vertex.data().pvec;
        ret.training_rmse = vertex.data().train_rmse;
        ret.validation_rmse = vertex.data().validation_rmse;
        return ret;
      }


      static graphlab::empty signal_left(icontext_type& context,
          const vertex_type& vertex) {
        if(vertex.num_out_edges() > 0) context.signal(vertex);
        return graphlab::empty();
      } // end of signal_left 

      static graphlab::empty signal_right(icontext_type& context,
          const vertex_type& vertex) {
        if(vertex.num_in_edges() > 0) context.signal(vertex);
        return graphlab::empty();
      } // end of signal_left 

  }; // end of nmf vertex program

gather_type count_edges(nmf_vertex_program::icontext_type & context, const graph_type::edge_type& edge) {
  gather_type ret;
  if (edge.data().role == edge_data::TRAIN){
    ret.training_rmse = 1;
  }
  else if (edge.data().role == edge_data::VALIDATE){
    ret.validation_rmse = 1;
  }
  if (edge.data().weight < 0)
    logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << edge.source().id() << " with value: " << edge.data().weight << std::endl;
  return ret;
}


struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    return "";
  }
  std::string save_edge(const edge_type& edge) const {
    if (edge.data().role != edge_data::PREDICT)
      return "";

    std::stringstream strm;
    const double prediction = 
      edge.source().data().pvec.dot(edge.target().data().pvec);
    strm << edge.source().id() << '\t' 
      << -edge.target().id()-SAFE_NEG_OFFSET << '\t'
      << prediction << '\n';
    return strm.str();
  }
}; // end of prediction_saver

struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + ") ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + ") ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
    const std::string& filename,
    const std::string& line) {
  ASSERT_FALSE(line.empty()); 

 // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float weight(0);
  strm >> source_id >> target_id;

  if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
    logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
    return true;
  }

  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
 
  // for test files (.predict) no need to read the actual rating value.
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    strm >> weight;
    if (weight < nmf_vertex_program::MINVAL || weight > nmf_vertex_program::MAXVAL)
      logstream(LOG_FATAL)<<"Rating values should be between " << nmf_vertex_program::MINVAL << " and " << nmf_vertex_program::MAXVAL << ". Got value: " << weight << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));

  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(weight, role)); 
  return true; // successful load
} // end of graph_loader


size_t vertex_data::NLATENT = 20;
double nmf_vertex_program::TOLERANCE = 1e-3;
size_t nmf_vertex_program::MAX_UPDATES = -1;
double nmf_vertex_program::MAXVAL = 1e+100;
double nmf_vertex_program::MINVAL = -1e+100;
bool nmf_vertex_program::debug = false;


/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<nmf_vertex_program> engine_type;

int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  std::string predictions;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
      "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D", vertex_data::NLATENT,
      "Number of latent parameters to use.");
  clopts.attach_option("engine", exec_type, 
      "The engine type synchronous or asynchronous");
  clopts.attach_option("max_iter", nmf_vertex_program::MAX_UPDATES,
      "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("debug", nmf_vertex_program::debug, 
      "debug - additional verbose info"); 
  clopts.attach_option("maxval", nmf_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", nmf_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("predictions", predictions,
      "The prefix (folder and filename) to save predictions.");

  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  debug = nmf_vertex_program::debug;
  
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
    << timer.current_time() << std::endl;
  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
    << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
    << "========== Graph statistics on proc " << dc.procid() 
    << " ==============="
    << "\n Num vertices: " << graph.num_vertices()
    << "\n Num edges: " << graph.num_edges()
    << "\n Num replica: " << graph.num_replicas()
    << "\n Replica to vertex ratio: " 
    << float(graph.num_replicas())/graph.num_vertices()
    << "\n --------------------------------------------" 
    << "\n Num local own vertices: " << graph.num_local_own_vertices()
    << "\n Num local vertices: " << graph.num_local_vertices()
    << "\n Replica to own ratio: " 
    << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
    << "\n Num local edges: " << graph.num_local_edges()
    //<< "\n Begin edge id: " << graph.global_eid(0)
    << "\n Edge balance ratio: " 
    << float(graph.num_local_edges())/graph.num_edges()
    << std::endl;

  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);


  // Run the NMF ---------------------------------------------------------
  dc.cout() << "Running NMF" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  dc.cout() << "Time   Training    Validation" <<std::endl;
  dc.cout() << "       RMSE        RMSE " <<std::endl;
  timer.start();

  gather_type edge_count = engine.map_reduce_edges<gather_type>(count_edges);
  dc.cout()<<"Training edges: " << edge_count.training_rmse << " validation edges: " << edge_count.validation_rmse << std::endl;

  graphlab::vertex_set left = graph.select(isuser_node);
  graphlab::vertex_set right = ~left;
  graph.transform_vertices(nmf_vertex_program::verify_rows, left);

  graphlab::timer mytimer; mytimer.start();

  for (uint j=0; j< nmf_vertex_program::MAX_UPDATES; j++){
    x1 = graph.map_reduce_vertices<gather_type>(nmf_vertex_program::pre_iter,right);
    px = &x1;
    for (int i=0; i< (int)vertex_data::NLATENT; i++)
      ASSERT_NE(px->pvec[i], 0);
    
    dc.cout()<< std::setw(8) << mytimer.current_time() << " " << sqrt(x1.training_rmse/edge_count.training_rmse);
    if (edge_count.validation_rmse > 0)
      dc.cout() << " " << std::setw(8) << sqrt(x1.validation_rmse/edge_count.validation_rmse) << std::endl;
    else dc.cout() << std::endl;
    engine.map_reduce_vertices<graphlab::empty>(nmf_vertex_program::signal_left);
    engine.start();
    x1.reset();

    x2 = graph.map_reduce_vertices<gather_type>(nmf_vertex_program::pre_iter,left);
    px = &x2;

    engine.map_reduce_vertices<graphlab::empty>(nmf_vertex_program::signal_right);
    engine.start();
    x2.reset();
  }

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
    << std::endl
    << "Final Runtime (seconds):   " << runtime 
                                        << std::endl
                                        << "Updates executed: " << engine.num_updates() << std::endl
                                        << "Update Rate (updates/second): " 
                                          << engine.num_updates() / runtime << std::endl;


  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;
    //save the predictions
    graph.save(predictions, prediction_saver(),
        gzip_output, save_vertices, 
        save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
        gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
        gzip_output, save_edges, save_vertices, threads_per_machine);

  }

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/printouts.hpp
================================================
#ifndef PRINTOUTS
#define PRINTOUTS
#define MAX_PRINTOUT_LEN 25

bool absolute_value = true;

inline double fabs2(double val){
  if (absolute_value)
    return fabs(val);
  else return val;
}


gather_type collect_vec(const graph_type::vertex_type& vertex) {
  assert(pcurrent);
  assert(pcurrent->offset >= 0 && pcurrent->offset < vertex.data().pvec.size());
  int len=std::min(pcurrent->end-pcurrent->start,MAX_PRINTOUT_LEN);
  assert(len > 0);
  gather_type ret;
  ret.pvec = vec::Zero(len);
  if (vertex.id() >= (uint)pcurrent->start && vertex.id() < (uint)pcurrent->start +len)
    ret.pvec[vertex.id() - pcurrent->start] = vertex.data().pvec[pcurrent->offset];
  return ret;
}


void print_vec(const char * name, const DistVec & vec, bool high = false){
 if (!debug)
   return;
  int i;
 printf("%s[%d]\n", name, vec.offset);
 pcurrent = (DistVec*)&vec;
 int len=std::min(vec.end-vec.start,MAX_PRINTOUT_LEN);
 gather_type ret = pgraph->map_reduce_vertices<gather_type>(collect_vec);
 for (i= 0; i< len; i++){
  if (high)
    printf("%15.15lg ", fabs2(ret.pvec[i]));
  else
    printf("%.5lg ", fabs2(ret.pvec[i]));
  }
 printf("\n");
}

void print_vec(const char * name, const vec & pvec, bool high = false){
 if (!debug)
   return;
  printf("%s\n", name);
 for (int i= 0; i< std::min((int)pvec.size(), MAX_PRINTOUT_LEN); i++){
  if (high)
   printf("%15.15lg ", fabs2(pvec[i]));
  else
   printf("%.5lg ", fabs2(pvec[i]));
  }
 printf("\n");
}
void print_mat(const char * name, const mat & pmat, bool high = false){
 if (!debug)
   return;
  printf("%s\n", name);
 mat pmat2 = transpose((mat&)pmat);
 if (pmat2.cols() == 1)
    pmat2 = pmat2.transpose();
 for (int i= 0; i< std::min((int)pmat2.rows(), MAX_PRINTOUT_LEN); i++){
  for (int j=0; j< std::min((int)pmat2.cols(), MAX_PRINTOUT_LEN); j++){
    if (high)
      printf("%15.15lg ", fabs2(get_val(pmat2, i, j)));
    else
     printf("%.5lg ", fabs2(get_val(pmat2, i, j)));
  }
  printf("\n");
  
  }
}

void print_vec_pos(std::string name, vec & v, int i){
 if (!debug)
   return;
   if (i == -1)
    printf("%s\n", name.c_str());
  else {
    printf("%s[%d]: %.5lg\n", name.c_str(), i, fabs(v[i]));
    return;
  }
  for (int j=0; j< std::min((int)v.size(),MAX_PRINTOUT_LEN); j++){
   printf("%.5lg", fabs2(v(j)));
   if (v.size() > 1)
    printf(" ");
  }
  printf("\n");
}


#define PRINT_VEC(a) print_vec(#a,a,0)
#define PRINT_VEC2(a,b) print_vec(a,b,0)
#define PRINT_VEC3(a,b,c) print_vec_pos(a,b,c)
#define PRINT_VEC2_HIGH(a,i) print_vec(#a,a[i],1)
#define PRINT_INT(a) if (debug) printf("%s: %d\n", #a, a);
#define PRINT_NAMED_INT(a,b) if (debug) printf("%s: %d\n",a, b);
#define PRINT_DBL(a) if (debug) printf("%s: %.5lg\n", #a, a);
#define PRINT_NAMED_DBL(a,b) if (debug) printf("%s: %.5lg\n", a, b);
#define PRINT_MAT(a) print_mat(#a, a, 0);
#define PRINT_MAT2(a,b) print_mat(a,b,0);
#endif


================================================
FILE: toolkits/collaborative_filtering/sgd.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * \brief The main file for the SGD matrix factorization algorithm.
 *
 * This file contains the main body of the SGD matrix factorization
 * algorithm. 
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>

#include <Eigen/Dense>
#include "eigen_serialization.hpp"
#include <graphlab/macros_def.hpp>


typedef Eigen::VectorXd vec_type;
typedef Eigen::MatrixXd mat_type;

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
static bool debug;
int iter = 0;

bool isuser(uint node){
	return ((int)node) >= 0;
}

/** 
 * \ingroup toolkit_matrix_pvecization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the SGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the SGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
	/**
	 * \brief A shared "constant" that specifies the number of latent
	 * values to use.
	 */
	static size_t NLATENT;
	/** \brief The latent pvec for this vertex */
	vec_type pvec;

	int nupdates;
	/** 
	 * \brief Simple default constructor which randomizes the vertex
	 *  data 
	 */
	vertex_data() : nupdates(0) { if (debug) pvec = vec_type::Ones(NLATENT); else randomize(); } 
	/** \brief Randomizes the latent pvec */
	void randomize() { pvec.resize(NLATENT); pvec.setRandom(); }
	/** \brief Save the vertex data to a binary archive */
	void save(graphlab::oarchive& arc) const { 
		arc << nupdates << pvec;
	}
	/** \brief Load the vertex data from a binary archive */
	void load(graphlab::iarchive& arc) { 
		arc >> nupdates >> pvec;
	}
}; // end of vertex data


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data sgdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
	/**
	 * \brief The type of data on the edge;
	 *
	 * \li *Train:* the observed value is correct and used in training
	 * \li *Validate:* the observed value is correct but not used in training
	 * \li *Predict:* The observed value is not correct and should not be
	 *        used in training.
	 */
	enum data_role_type { TRAIN, VALIDATE, PREDICT  };

	/** \brief the observed value for the edge */
	float obs;

	/** \brief The train/validation/test designation of the edge */
	data_role_type role;

	/** \brief basic initialization */
	edge_data(float obs = 0, data_role_type role = PREDICT) :
		obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

stats_info count_edges(const graph_type::edge_type & edge){
	stats_info ret;

	if (edge.data().role == edge_data::TRAIN)
		ret.training_edges = 1;
	else if (edge.data().role == edge_data::VALIDATE)
		ret.validation_edges = 1;
	ret.max_user = (size_t)edge.source().id();
	ret.max_item = (-edge.target().id()-SAFE_NEG_OFFSET);
	return ret;
}


double extract_l2_error(const graph_type::edge_type & edge);


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
		const graph_type::vertex_type& vertex) {
	return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 *
 */
class gather_type {
	public:
		/**
		 * \brief Stores the current sum of nbr.pvec.transpose() *
		 * nbr.pvec
		 */

		/**
		 * \brief Stores the current sum of nbr.pvec * edge.obs
		 */
		vec_type pvec;
		/** \brief basic default constructor */
		gather_type() { }

		/**
		 * \brief This constructor computes XtX and Xy and stores the result
		 * in XtX and Xy
		 */
		gather_type(const vec_type& X) {
			pvec = X;
		} // end of constructor for gather type

		/** \brief Save the values to a binary archive */
		void save(graphlab::oarchive& arc) const { arc << pvec; }

		/** \brief Read the values from a binary archive */
		void load(graphlab::iarchive& arc) { arc >> pvec; }  

		/** 
		 */
		gather_type& operator+=(const gather_type& other) {
			if (pvec.size() == 0){
				pvec = other.pvec;
				return *this;
			}
			else if (other.pvec.size() == 0)
				return *this;
			pvec += other.pvec;
			return *this;
		} // end of operator+=

}; // end of gather type

typedef vec_type message_type;

bool isuser_node(const graph_type::vertex_type& vertex){
	return isuser(vertex.id());
}

/**
 * SGD vertex program type
 */ 
class sgd_vertex_program : 
	public graphlab::ivertex_program<graph_type, gather_type,
	message_type> {
		public:
			/** The convergence tolerance */
			static double TOLERANCE;
			static double LAMBDA;
			static double GAMMA;
			static double MAXVAL;
			static double MINVAL;
			static double STEP_DEC;
			static bool debug;
			static size_t MAX_UPDATES;
			vec_type pmsg;

			void save(graphlab::oarchive& arc) const { 
				arc << pmsg;
			}
			/** \brief Load the vertex data from a binary archive */
			void load(graphlab::iarchive& arc) { 
				arc >> pmsg;
			}

			/** The set of edges to gather along */
			edge_dir_type gather_edges(icontext_type& context, 
					const vertex_type& vertex) const { 
				return graphlab::ALL_EDGES; 
			}; // end of gather_edges 


			gather_type gather(icontext_type& context, const vertex_type& vertex, 
					edge_type& edge) const {

				vec_type delta, other_delta;
				//this is user node
				if (vertex.num_in_edges() == 0){
					vertex_type my_vertex(vertex);
					//get a copy of the item node
					vertex_type other_vertex(get_other_vertex(edge, vertex));
					//compute the current prediction by computing a dot production of user and item nodes
					double pred = vertex.data().pvec.dot(other_vertex.data().pvec);
					//truncte predictions into allowed range
					pred = std::min(pred, sgd_vertex_program::MAXVAL);
					pred = std::max(pred, sgd_vertex_program::MINVAL); 
					//compute the prediction error 
					const float err = edge.data().obs - pred;
					if (debug)
						std::cout<<"entering edge " << (int)edge.source().id() << ":" << (int)edge.target().id() << " err: " << err << " rmse: " << err*err <<std::endl;
					if (std::isnan(err))
						logstream(LOG_FATAL)<<"Got into numeric errors.. try to tune step size and regularization using --lambda and --gamma flags" << std::endl;

					//for training edges, update the linear model
					if (edge.data().role == edge_data::TRAIN){
						//compute the change in gradient for this user node
						delta = GAMMA*(err*other_vertex.data().pvec - LAMBDA*vertex.data().pvec);
						//compute the change in gradient for this item node
						other_delta = GAMMA*(err*vertex.data().pvec - LAMBDA*other_vertex.data().pvec);

						//heuristic: update the current gradient with the change (this change is discarded when this function exists)
						//my_vertex.data().pvec += delta;
						//other_vertex.data().pvec += other_delta;
						if (debug)
							std::cout<<"new val:" << (int)edge.source().id() << ":" << (int)edge.target().id() << " U " << my_vertex.data().pvec.transpose() << " V " << other_vertex.data().pvec.transpose() << std::endl;
						//send the delta gradient for the item node to be updated in the next iteration
						if(std::fabs(err) > TOLERANCE && other_vertex.data().nupdates < MAX_UPDATES) 
							context.signal(other_vertex, other_delta);
					}
				} 
				return gather_type(delta);
			} // end of gather function

			void init(icontext_type& context,
					const vertex_type& vertex,
					const message_type& msg) {
				//if this is an item node, store the change in the gradient (sum of changes) to be
				//applied in the apply() function
				if (vertex.num_in_edges() > 0){
					pmsg = msg;
				}
			}

			void apply(icontext_type& context, vertex_type& vertex,
					const gather_type& sum) {

				vertex_data& vdata = vertex.data(); 
				//this is a user node, update the gradient using the comulative sum of gradient updates computed in gather
				if (sum.pvec.size() > 0){
					vdata.pvec += sum.pvec; 
					assert(vertex.num_in_edges() == 0);
				}
				//if this is an item node, update the gradient using the received sum from the init() function
				else if (pmsg.size() > 0){
					vdata.pvec += pmsg;
					assert(vertex.num_out_edges() == 0); 
				}
				++vdata.nupdates;
			} // end of apply

			/** The edges to scatter along */
			edge_dir_type scatter_edges(icontext_type& context,
					const vertex_type& vertex) const { 
				return graphlab::ALL_EDGES; 
			}; // end of scatter edges

			/** Scatter reschedules neighbors */  
			void scatter(icontext_type& context, const vertex_type& vertex, 
					edge_type& edge) const {
				edge_data& edata = edge.data();
				if(edata.role == edge_data::TRAIN) {
					const vertex_type other_vertex = get_other_vertex(edge, vertex);
					// Reschedule neighbors ------------------------------------------------
					if(other_vertex.data().nupdates < MAX_UPDATES) 
						context.signal(other_vertex, vec_type::Zero(vertex_data::NLATENT));
				}
			} // end of scatter function


			/**
			 * \brief Signal all vertices on one side of the bipartite graph
			 */
			static graphlab::empty signal_left(icontext_type& context,
					vertex_type& vertex) {
				if(vertex.num_out_edges() > 0) context.signal(vertex, vec_type::Zero(vertex_data::NLATENT));
				return graphlab::empty();
			} // end of signal_left 

	}; // end of sgd vertex program


struct error_aggregator : public graphlab::IS_POD_TYPE {
	typedef sgd_vertex_program::icontext_type icontext_type;
	typedef graph_type::edge_type edge_type;
	double train_error, validation_error;

	error_aggregator() : 
		train_error(0), validation_error(0){ }
	error_aggregator& operator+=(const error_aggregator& other) {
		train_error += other.train_error;
		assert(!std::isnan(train_error));
		validation_error += other.validation_error;
		return *this;
	}
	static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
		error_aggregator agg;
		if (edge.data().role == edge_data::TRAIN){
			if (isuser_node(edge.source())) 
				agg.train_error = extract_l2_error(edge); 
			assert(!std::isnan(agg.train_error));
		}
		else if (edge.data().role == edge_data::VALIDATE){
			if (isuser_node(edge.source())) 
				agg.validation_error = extract_l2_error(edge); 
		}
		return agg;
	}


	static void finalize(icontext_type& context, const error_aggregator& agg) {
		iter++;
		if (iter%2 == 0)
			return; 
		const double train_error = std::sqrt(agg.train_error / info.training_edges);
		assert(!std::isnan(train_error));
		context.cout() << std::setw(8) << context.elapsed_seconds()  << "  " << std::setw(8) << train_error;
		if(info.validation_edges > 0) {
			const double validation_error = 
				std::sqrt(agg.validation_error / info.validation_edges);
			context.cout() << "   " << std::setw(8) << validation_error; 
		}
		context.cout() << std::endl;
		sgd_vertex_program::GAMMA *= sgd_vertex_program::STEP_DEC;
	}
}; // end of error aggregator

/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
	double pred = 
		edge.source().data().pvec.dot(edge.target().data().pvec);
	pred = std::min(sgd_vertex_program::MAXVAL, pred);
	pred = std::max(sgd_vertex_program::MINVAL, pred);
	double rmse = (edge.data().obs - pred) * (edge.data().obs - pred);
	assert(rmse <= pow(sgd_vertex_program::MAXVAL-sgd_vertex_program::MINVAL,2));
	return rmse;
} // end of extract_l2_error


struct prediction_saver {
	typedef graph_type::vertex_type vertex_type;
	typedef graph_type::edge_type   edge_type;
	/* save the linear model, using the format:
	   nodeid) factor1 factor2 ... factorNLATENT \n
	 */
	std::string save_vertex(const vertex_type& vertex) const {
		return "";
	}
	std::string save_edge(const edge_type& edge) const {
		if (edge.data().role != edge_data::PREDICT)
			return "";

		std::stringstream strm;
		const double prediction = 
			edge.source().data().pvec.dot(edge.target().data().pvec);
		strm << edge.source().id() << '\t' 
			<< -edge.target().id()-SAFE_NEG_OFFSET << '\t'
			<< prediction << '\n';
		return strm.str();
	}
}; // end of prediction_saver

struct linear_model_saver_U {
	typedef graph_type::vertex_type vertex_type;
	typedef graph_type::edge_type   edge_type;
	/* save the linear model, using the format:
	   nodeid) factor1 factor2 ... factorNLATENT \n
	 */
	std::string save_vertex(const vertex_type& vertex) const {
		if (vertex.num_out_edges() > 0){
			std::string ret = boost::lexical_cast<std::string>(vertex.id()) + ") ";
			for (uint i=0; i< vertex_data::NLATENT; i++)
				ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
			ret += "\n";
			return ret;
		}
		else return "";
	}
	std::string save_edge(const edge_type& edge) const {
		return "";
	}
}; 

struct linear_model_saver_V {
	typedef graph_type::vertex_type vertex_type;
	typedef graph_type::edge_type   edge_type;
	/* save the linear model, using the format:
	   nodeid) factor1 factor2 ... factorNLATENT \n
	 */
	std::string save_vertex(const vertex_type& vertex) const {
		if (vertex.num_out_edges() == 0){
			std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + ") ";
			for (uint i=0; i< vertex_data::NLATENT; i++)
				ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
			ret += "\n";
			return ret;
		}
		else return "";
	}
	std::string save_edge(const edge_type& edge) const {
		return "";
	}
}; 


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
		const std::string& filename,
		const std::string& line) {

	// Parse the line
	std::stringstream strm(line);
	graph_type::vertex_id_type source_id(-1), target_id(-1);
	float obs(0);
	strm >> source_id >> target_id;

	if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
		logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
		return true;
	}

	// Determine the role of the data
	edge_data::data_role_type role = edge_data::TRAIN;
	if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
	else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;

	if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
		strm >> obs;
		if (obs < sgd_vertex_program::MINVAL || obs > sgd_vertex_program::MAXVAL){
			logstream(LOG_WARNING)<<"Rating values should be between " << sgd_vertex_program::MINVAL << " and " << sgd_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
			assert(false); 
		}
	}
	target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
	// Create an edge and add it to the graph
	graph.add_edge(source_id, target_id, edge_data(obs, role)); 
	return true; // successful load
} // end of graph_loader


size_t vertex_data::NLATENT = 20;
double sgd_vertex_program::TOLERANCE = 1e-3;
double sgd_vertex_program::LAMBDA = 0.001;
double sgd_vertex_program::GAMMA = 0.001;
size_t sgd_vertex_program::MAX_UPDATES = -1;
double sgd_vertex_program::MAXVAL = 1e+100;
double sgd_vertex_program::MINVAL = -1e+100;
double sgd_vertex_program::STEP_DEC = 0.9;
bool sgd_vertex_program::debug = false;


/**
 * \brief The engine type used by the SGD matrix factorization
 * algorithm.
 *
 * The SGD matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<sgd_vertex_program> engine_type;

int main(int argc, char** argv) {
	global_logger().set_log_level(LOG_INFO);
	global_logger().set_log_to_console(true);

	// Parse command line options -----------------------------------------------
	const std::string description = 
		"Compute the SGD factorization of a matrix.";
	graphlab::command_line_options clopts(description);
	std::string input_dir;
	std::string predictions;
	size_t interval = 0;
	std::string exec_type = "synchronous";
	clopts.attach_option("matrix", input_dir,
			"The directory containing the matrix file");
	clopts.add_positional("matrix");
	clopts.attach_option("D", vertex_data::NLATENT,
			"Number of latent parameters to use.");
	clopts.attach_option("engine", exec_type, 
			"The engine type synchronous or asynchronous");
	clopts.attach_option("max_iter", sgd_vertex_program::MAX_UPDATES,
			"The maxumum number of udpates allowed for a vertex");
	clopts.attach_option("lambda", sgd_vertex_program::LAMBDA, 
			"SGD regularization weight"); 
	clopts.attach_option("gamma", sgd_vertex_program::GAMMA, 
			"SGD step size"); 
	clopts.attach_option("debug", sgd_vertex_program::debug, 
			"debug - additional verbose info"); 
	clopts.attach_option("tol", sgd_vertex_program::TOLERANCE,
			"residual termination threshold");
	clopts.attach_option("maxval", sgd_vertex_program::MAXVAL, "max allowed value");
	clopts.attach_option("minval", sgd_vertex_program::MINVAL, "min allowed value");
	clopts.attach_option("step_dec", sgd_vertex_program::STEP_DEC, "multiplicative step decrement");
	clopts.attach_option("interval", interval, 
			"The time in seconds between error reports");
	clopts.attach_option("predictions", predictions,
			"The prefix (folder and filename) to save predictions.");

	parse_implicit_command_line(clopts);

	if(!clopts.parse(argc, argv) || input_dir == "") {
		std::cout << "Error in parsing command line arguments." << std::endl;
		clopts.print_description();
		return EXIT_FAILURE;
	}
	debug = sgd_vertex_program::debug;
	//  omp_set_num_threads(clopts.get_ncpus());
	///! Initialize control plain using mpi
	graphlab::mpi_tools::init(argc, argv);
	graphlab::distributed_control dc;

	dc.cout() << "Loading graph." << std::endl;
	graphlab::timer timer; 
	graph_type graph(dc, clopts);  
	graph.load(input_dir, graph_loader); 
	dc.cout() << "Loading graph. Finished in " 
		<< timer.current_time() << std::endl;

	if (dc.procid() == 0) 
		add_implicit_edges<edge_data>(implicitratingtype, graph, dc);


	dc.cout() << "Finalizing graph." << std::endl;
	timer.start();
	graph.finalize();
	dc.cout() << "Finalizing graph. Finished in " 
		<< timer.current_time() << std::endl;

        if (!graph.num_edges() || !graph.num_vertices())
          logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


	dc.cout() 
		<< "========== Graph statistics on proc " << dc.procid() 
		<< " ==============="
		<< "\n Num vertices: " << graph.num_vertices()
		<< "\n Num edges: " << graph.num_edges()
		<< "\n Num replica: " << graph.num_replicas()
		<< "\n Replica to vertex ratio: " 
		<< float(graph.num_replicas())/graph.num_vertices()
		<< "\n --------------------------------------------" 
		<< "\n Num local own vertices: " << graph.num_local_own_vertices()
		<< "\n Num local vertices: " << graph.num_local_vertices()
		<< "\n Replica to own ratio: " 
		<< (float)graph.num_local_vertices()/graph.num_local_own_vertices()
		<< "\n Num local edges: " << graph.num_local_edges()
		//<< "\n Begin edge id: " << graph.global_eid(0)
		<< "\n Edge balance ratio: " 
		<< float(graph.num_local_edges())/graph.num_edges()
		<< std::endl;

	dc.cout() << "Creating engine" << std::endl;
	engine_type engine(dc, graph, exec_type, clopts);

	// Add error reporting to the engine
	const bool success = engine.add_edge_aggregator<error_aggregator>
		("error", error_aggregator::map, error_aggregator::finalize) &&
		engine.aggregate_periodic("error", interval);
	ASSERT_TRUE(success);


	// Signal all vertices on the vertices on the left (libersgd) 
	engine.map_reduce_vertices<graphlab::empty>(sgd_vertex_program::signal_left);
	info = graph.map_reduce_edges<stats_info>(count_edges);
	dc.cout()<<"Training edges: " << info.training_edges << " validation edges: " << info.validation_edges << std::endl;


	// Run the PageRank ---------------------------------------------------------
	dc.cout() << "Running SGD" << std::endl;
	dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
	dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
	dc.cout() << "Time   Training    Validation" <<std::endl;
	dc.cout() << "       RMSE        RMSE " <<std::endl;
	timer.start();
	engine.start();  

	const double runtime = timer.current_time();
	dc.cout() << "----------------------------------------------------------"
		<< std::endl
		<< "Final Runtime (seconds):   " << runtime 
						    << std::endl
								<< "Updates executed: " << engine.num_updates() << std::endl
											      << "Update Rate (updates/second): " 
												      << engine.num_updates() / runtime << std::endl;

	// Compute the final training error -----------------------------------------
	dc.cout() << "Final error: " << std::endl;
	engine.aggregate_now("error");

	// Make predictions ---------------------------------------------------------
	if(!predictions.empty()) {
		std::cout << "Saving predictions" << std::endl;
		const bool gzip_output = false;
		const bool save_vertices = false;
		const bool save_edges = true;
		const size_t threads_per_machine = 1;
		//save the predictions
		graph.save(predictions, prediction_saver(),
				gzip_output, save_vertices, 
				save_edges, threads_per_machine);
		//save the linear model
		graph.save(predictions + ".U", linear_model_saver_U(),
				gzip_output, save_edges, save_vertices, threads_per_machine);
		graph.save(predictions + ".V", linear_model_saver_V(),
				gzip_output, save_edges, save_vertices, threads_per_machine);

	}


	graphlab::mpi_tools::finalize();
	return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/sparse_als.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * \brief The main file for the ALS matrix factorization algorithm.
 *
 * This file contains the main body of the ALS matrix factorization
 * algorithm. 
 */

#include <Eigen/Dense>
#define EIGEN_DONT_PARALLELIZE //eigen parallel for loop interfers with ours.

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


// This file defines the serialization code for the eigen types.
#include "eigen_serialization.hpp"
#include "eigen_wrapper.hpp"
#include "stats.hpp"
#include <graphlab.hpp>
#include <graphlab/util/stl_util.hpp>


#include <graphlab/macros_def.hpp>

const int SAFE_NEG_OFFSET = 2; //add 2 to negative node id
//to prevent -0 and -1 which arenot allowed

/**
 * \brief We use the eigen library's vector type to represent
 * mathematical vectors.
 */
typedef Eigen::VectorXd vec;
typedef Eigen::VectorXi ivec;

/**
 * \brief We use the eigen library's matrix type to represent
 * matrices.
 */
typedef Eigen::MatrixXd mat;
#include "cosamp.hpp"

//algorithm run modes
enum {
  SPARSE_USR_FACTOR = 1, SPARSE_ITM_FACTOR = 2, SPARSE_BOTH_FACTORS = 3
};

int algorithm = SPARSE_USR_FACTOR;
double user_sparsity = 0.8;
double movie_sparsity = 0.8;


/** 
 * \ingroup toolkit_matrix_factorization
 *
 * \brief the vertex data type which contains the latent factor.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the ALS graph.  Associated with each vertex is a factor
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the ALS algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column factors.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The number of times this vertex has been updated. */
  uint32_t nupdates;
  /** \brief The most recent L1 change in the factor value */
  float residual; //! how much the latent value has changed
  /** \brief The latent factor for this vertex */
  vec factor;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : nupdates(0), residual(1) { randomize(); } 
  /** \brief Randomizes the latent factor */
  void randomize() { factor.resize(NLATENT); factor.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << nupdates << residual << factor;        
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> nupdates >> residual >> factor;
  }
}; // end of vertex data


size_t vertex_data::NLATENT = 20;

/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data also stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

stats_info count_edges(const graph_type::edge_type & edge){
  stats_info ret;

  if (edge.data().role == edge_data::TRAIN)
     ret.training_edges = 1;
  else if (edge.data().role == edge_data::VALIDATE)
     ret.validation_edges = 1;
  ret.max_user = (size_t)edge.source().id();
  ret.max_item = (size_t)edge.target().id();
  return ret;
}


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 * \brief The gather type used to construct XtX and Xty needed for the ALS
 * update
 *
 * To compute the ALS update we need to compute the sum of 
 * \code
 *  sum: XtX = nbr.factor.transpose() * nbr.factor 
 *  sum: Xy  = nbr.factor * edge.obs
 * \endcode
 * For each of the neighbors of a vertex. 
 *
 * To do this in the Gather-Apply-Scatter model the gather function
 * computes and returns a pair consisting of XtX and Xy which are then
 * added. The gather type represents that tuple and provides the
 * necessary gather_type::operator+= operation.
 *
 */
class gather_type {
public:
  /**
   * \brief Stores the current sum of nbr.factor.transpose() *
   * nbr.factor
   */
  mat XtX;

  /**
   * \brief Stores the current sum of nbr.factor * edge.obs
   */
  vec Xy;

  /** \brief basic default constructor */
  gather_type() { }

  /**
   * \brief This constructor computes XtX and Xy and stores the result
   * in XtX and Xy
   */
  gather_type(const vec& X, const double y) :
    XtX(X.size(), X.size()), Xy(X.size()) {
    XtX = X * X.transpose();
    Xy = X * y;
  } // end of constructor for gather type

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << XtX << Xy; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> XtX >> Xy; }  

  /** 
   * \brief Computes XtX += other.XtX and Xy += other.Xy updating this
   * tuples value
   */
  gather_type& operator+=(const gather_type& other) {
    if(other.Xy.size() == 0) {
      ASSERT_EQ(other.XtX.rows(), 0);
      ASSERT_EQ(other.XtX.cols(), 0);
    } else {
      if(Xy.size() == 0) {
        ASSERT_EQ(XtX.rows(), 0); 
        ASSERT_EQ(XtX.cols(), 0);
        XtX = other.XtX; Xy = other.Xy;
      } else {
        XtX.triangularView<Eigen::Upper>() += other.XtX;  
        Xy += other.Xy;
      }
    }
    return *this;
  } // end of operator+=

}; // end of gather type


/**
 * \brief ALS vertex program implements the alternating least squares
 * algorithm in the Gather-Apply-Scatter abstraction.
 *
 * The ALS update treats adjacent vertices (rows or columns) as "X"
 * (independent) values and the edges (matrix entries) as observed "y"
 * (dependent) values and then updates the current vertex value as a
 * weight "w" such that:
 *
 *    y = X * w + noise
 *
 * This is accomplished using the following equation:
 *
 *    w = inv(X' * X) * (X * y)
 *
 * We implement this in the Gather-Apply-Scatter model by:
 *
 *  1) Gather: returns the tuple (X' * X, X * y)
 *     Sum:   (aX' * aX, aX * ay) + (bX' * bX, bX * by) = 
 *                 (aX' * aX + bX' * bX, aX * ay + bX * by)
 *
 *  2) Apply: Solves  inv(X' * X) * (X * y)
 *
 *  3) Scatter: schedules the update of adjacent vertices if this
 *      vertex has changed sufficiently and the edge is not well
 *      predicted.
 *
 * 
 */ 
class als_vertex_program : 
  public graphlab::ivertex_program<graph_type, gather_type,
                                   graphlab::messages::sum_priority>,
  public graphlab::IS_POD_TYPE {
public:
  /** The convergence tolerance */
  static double TOLERANCE;
  static double LAMBDA;
  static size_t MAX_UPDATES;
  static double MAXVAL;
  static double MINVAL;
 
  /** The set of edges to gather along */
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /** The gather function computes XtX and Xy */
  gather_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    if(edge.data().role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      return gather_type(other_vertex.data().factor, edge.data().obs);
    } else return gather_type();
  } // end of gather function

  /** apply collects the sum of XtX and Xy */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    // Get and reset the vertex data
    vertex_data& vdata = vertex.data(); 
    // Determine the number of neighbors.  Each vertex has only in or
    // out edges depending on which side of the graph it is located
    if(sum.Xy.size() == 0) { vdata.residual = 0; ++vdata.nupdates; return; }
    mat XtX = sum.XtX;
    vec Xy = sum.Xy;
    // Add regularization
    for(int i = 0; i < XtX.rows(); ++i) XtX(i,i) += LAMBDA; // /nneighbors;
    // Solve the least squares problem using eigen ----------------------------
    const vec old_factor = vdata.factor;
   
    long nodeid = (long)vertex.id(); 
    bool isuser = nodeid >= 0;
    if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || 
        (algorithm == SPARSE_ITM_FACTOR && !isuser)){ 
      double sparsity_level = 1.0;
      if (isuser)
        sparsity_level -= user_sparsity;
      else sparsity_level -= movie_sparsity;
      vdata.factor = CoSaMP(XtX, Xy, ceil(sparsity_level*(double)vertex_data::NLATENT), 10, 1e-4, vertex_data::NLATENT);
    }
    else vdata.factor = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xy);

    // Compute the residual change in the factor factor -----------------------
    vdata.residual = (vdata.factor - old_factor).cwiseAbs().sum() / XtX.rows();
    ++vdata.nupdates;
  } // end of apply
  
  /** The edges to scatter along */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /** Scatter reschedules neighbors */  
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    edge_data& edata = edge.data();
    if(edata.role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      const vertex_data& vdata = vertex.data();
      const vertex_data& other_vdata = other_vertex.data();
      const double pred = vdata.factor.dot(other_vdata.factor);
      const float error = std::fabs(edata.obs - pred);
      const double priority = (error * vdata.residual); 
      // Reschedule neighbors ------------------------------------------------
      if( priority > TOLERANCE && other_vdata.nupdates < MAX_UPDATES) 
        context.signal(other_vertex, priority);
    }
  } // end of scatter function


  /**
   * \brief Signal all vertices on one side of the bipartite graph
   */
  static graphlab::empty signal_left(icontext_type& context,
                                     const vertex_type& vertex) {
    if(vertex.num_out_edges() > 0) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_left 

}; // end of als vertex program


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
                         const std::string& filename,
                         const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
  // Parse the line
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0); 
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(target_id) = qi::_1] >> 
      -(-qi::char_(',') >> qi::float_[phoenix::ref(obs) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space); 

  if(!success) return false;

  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    if (obs < als_vertex_program::MINVAL || obs > als_vertex_program::MAXVAL)
      logstream(LOG_FATAL)<<"Rating values should be between " << als_vertex_program::MINVAL << " and " << als_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
  }
 
  // map target id into a separate number space
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred = 
    edge.source().data().factor.dot(edge.target().data().factor);
  pred = std::min(als_vertex_program::MAXVAL, pred);
  pred = std::max(als_vertex_program::MINVAL, pred);
  return (edge.data().obs - pred) * (edge.data().obs - pred);
} // end of extract_l2_error


double als_vertex_program::TOLERANCE = 1e-3;
double als_vertex_program::LAMBDA = 0.01;
size_t als_vertex_program::MAX_UPDATES = -1;
double als_vertex_program::MAXVAL = 1e+100;
double als_vertex_program::MINVAL = -1e+100;


/**
 * \brief The error aggregator is used to accumulate the overal
 * prediction error.
 *
 * The error aggregator is itself a "reduction type" and contains the
 * two static methods "map" and "finalize" which operate on
 * error_aggregators and are used by the engine.add_edge_aggregator
 * api.
 */
struct error_aggregator : public graphlab::IS_POD_TYPE {
  typedef als_vertex_program::icontext_type icontext_type;
  typedef graph_type::edge_type edge_type;
  double train_error, validation_error;
  error_aggregator() : 
    train_error(0), validation_error(0) { }

  error_aggregator& operator+=(const error_aggregator& other) {
    train_error += other.train_error;
    validation_error += other.validation_error;
    return *this;
  }
  static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
    error_aggregator agg;
    if(edge.data().role == edge_data::TRAIN) {
      agg.train_error = extract_l2_error(edge);
    } else if(edge.data().role == edge_data::VALIDATE) {
      agg.validation_error = extract_l2_error(edge); 
    }
    return agg;
  }
  static void finalize(icontext_type& context, const error_aggregator& agg) {
    const double train_error = std::sqrt(agg.train_error / info.training_edges);
    context.cout() << context.elapsed_seconds() << "\t" << train_error;
    if (info.validation_edges > 0) {
      const double validation_error = 
        std::sqrt(agg.validation_error / info.validation_edges);
      context.cout() << "\t" << validation_error; 
    }
    context.cout() << std::endl;
  }
}; // end of error aggregator


/**
 * \brief The prediction saver is used by the graph.save routine to
 * output the final predictions back to the filesystem.
 */
struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if(edge.data().role == edge_data::PREDICT) {
      std::stringstream strm;
      const double prediction = 
        edge.source().data().factor.dot(edge.target().data().factor);
      strm << edge.source().id() << '\t';
      strm << (-edge.target().id() - SAFE_NEG_OFFSET) << '\t';
      strm << prediction << '\n';
      return strm.str();
    } else return "";
  }
}; // end of prediction_saver


struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + ") ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<als_vertex_program> engine_type;

int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir, output_dir;
  std::string predictions;
  size_t interval = 1;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D",  vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("max_iter", als_vertex_program::MAX_UPDATES,
                       "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("lambda", als_vertex_program::LAMBDA, 
                       "ALS regularization weight"); 
  clopts.attach_option("tol", als_vertex_program::TOLERANCE,
                       "residual termination threshold");
  clopts.attach_option("maxval", als_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", als_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("interval", interval, 
                       "The time in seconds between error reports");
  clopts.attach_option("predictions", predictions,
                       "The prefix (folder and filename) to save predictions.");
  clopts.attach_option("user_sparsity", user_sparsity, "sparsity of user factors");
  clopts.attach_option("movie_sparsity", movie_sparsity, "sparsity of item factors");
  clopts.attach_option("algorithm", algorithm, "run mode. 1 = SPARSE_USR_FACTOR, 2 = SPARSE_ITM_FACTOR, 3 = SPARSE_BOTH_FACTORS");

  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("output", output_dir,
                       "Output results");
  
  parse_implicit_command_line(clopts);
  
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (user_sparsity < 0.5 || user_sparsity >= 1)
    logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --user_sparsity=XX in this range" << std::endl;

  if (movie_sparsity < 0.5 || movie_sparsity >= 1)
    logstream(LOG_FATAL)<<"Sparsity level should be [0.5,1). Please run again using --movie_sparsity=XX in this range" << std::endl;

if (algorithm != SPARSE_USR_FACTOR && algorithm != SPARSE_BOTH_FACTORS && algorithm != SPARSE_ITM_FACTOR)
    logstream(LOG_FATAL)<<"Algorithm should be 1 for SPARSE_USR_FACTOR, 2 for SPARSE_ITM_FACTOR and 3 for SPARSE_BOTH_FACTORS" << std::endl;

  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
            << timer.current_time() << std::endl;

  if (dc.procid() == 0) 
    add_implicit_edges<edge_data>(implicitratingtype, graph, dc);

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
            << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
      << "========== Graph statistics on proc " << dc.procid() 
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
      << "\n Edge balance ratio: " 
      << float(graph.num_local_edges())/graph.num_edges()
      << std::endl;
 
  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);

  // Add error reporting to the engine
  const bool success = engine.add_edge_aggregator<error_aggregator>
    ("error", error_aggregator::map, error_aggregator::finalize) &&
    engine.aggregate_periodic("error", interval);
  ASSERT_TRUE(success);
  

  // Signal all vertices on the vertices on the left (liberals) 
  engine.map_reduce_vertices<graphlab::empty>(als_vertex_program::signal_left);
  info = graph.map_reduce_edges<stats_info>(count_edges);
  dc.cout()<<"Training edges: " << info.training_edges << " validation edges: " << info.validation_edges << std::endl;

 
  dc.cout() << "Running Sparse-ALS" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  dc.cout() << "Time   Training    Validation" <<std::endl;
  dc.cout() << "       RMSE        RMSE " <<std::endl;
  timer.start();
  engine.start();  

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime 
            << std::endl
            << "Updates executed: " << engine.num_updates() << std::endl
            << "Update Rate (updates/second): " 
            << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  engine.aggregate_now("error");

  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 2;

    //save the predictions
    graph.save(predictions, prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
  
  }
             

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/stats.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 * Class for collecting graph statistics
 */


#ifndef TK_STATS
#define TK_STATS

  
struct stats_info{
  size_t validation_edges;
  size_t training_edges;
  size_t max_user;
  size_t max_item;
  
  stats_info(){
    validation_edges = training_edges = max_user = max_item = 0;
  }

  stats_info & operator+=(const stats_info & other){
     validation_edges += other.validation_edges;
     training_edges += other.training_edges;
     max_user = std::max(max_user, other.max_user);
     max_item = std::max(max_item, other.max_item); 
     return *this;
  }

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << validation_edges << training_edges << max_user << max_item; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> validation_edges >> training_edges >> max_user >> max_item; }  

};

  
stats_info info;


#endif //TK_STATS


================================================
FILE: toolkits/collaborative_filtering/svd.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 *  Implementation of the Lanczos algorithm, as given in:
 *  http://www.grycap.upv.es/slepc/documentation/reports/str8.pdf
 *  (Restarted Lanczos Bidiagonalization for the SVD in SLEPc)
 * 
 *  Code written by Danny Bickson, CMU, June 2011
 * */


#include "eigen_wrapper.hpp"
#include "types.hpp"
#include "eigen_serialization.hpp"
#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>


//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
int iter = 0;
//LANCZOS VARIABLES
int max_iter = 10;
int actual_vector_len = 0;
int nv = 0;
int nsv = 0;
double tol = 1e-8;
bool finished = false;
double ortho_repeats = 3;
bool update_function = false;
bool save_vectors = false;
bool use_ids = true;
std::string datafile; 
std::string vecfile;
int unittest;
int nodes = 0;
int data_size = 0;
std::string predictions;
int rows = -1, cols = -1;
bool quiet = false;
int nconv = 0;
int n = 0; 
int kk = 0;
bool binary = false; //if true, all edges = 1
mat a,PT;
bool v_vector = false;
int input_file_offset = 0; //if set to non zero, each row/col id will be reduced the input_file_offset
vec singular_values;

DECLARE_TRACER(svd_bidiagonal);
DECLARE_TRACER(svd_error_estimate);
DECLARE_TRACER(svd_error2);
DECLARE_TRACER(matproduct);
DECLARE_TRACER(svd_swork);
DECLARE_TRACER(svd_vectors);

void start_engine();

struct vertex_data {
  /** \brief The number of times this vertex has been updated. */
  vec pvec;
  double A_ii;

  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : A_ii(0) { randomize(); } 
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(data_size); pvec.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << pvec << A_ii;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> pvec >> A_ii;
  }
}; // end of vertex data


class gather_type {
  public:
    vec pvec;
    double training_rmse;
    double validation_rmse;
    gather_type() { training_rmse = validation_rmse = 0; }
    void save(graphlab::oarchive& arc) const { arc << pvec << training_rmse << validation_rmse; }
    void load(graphlab::iarchive& arc) { arc >> pvec >> training_rmse >> validation_rmse; }  
    gather_type& operator+=(const gather_type& other) {
      pvec += other.pvec;
      training_rmse += other.training_rmse;
      validation_rmse += other.validation_rmse;
      return *this;
    } 

};

gather_type ret;


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data svdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  double obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(double obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
graph_type * pgraph;

/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type get_other_vertex(graph_type::edge_type& edge, 
    const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex

//typedef double gather_type;
typedef double message_type;


#include "math.hpp" //uses vertex_data and edge_data so has to be included here
#include "printouts.hpp" // the same
typedef graphlab::omni_engine<Axb> engine_type;
engine_type * pengine = NULL;


/**
 * \brief The prediction saver is used by the graph.save routine to
 * output the final predictions back to the filesystem.
 */
struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if(edge.data().role == edge_data::PREDICT) {
      std::stringstream strm;
      Eigen::DiagonalMatrix<double, Eigen::Dynamic> diagonal_matrix(nconv);      
      diagonal_matrix.diagonal() = singular_values;
     const double prediction = 
        edge.source().data().pvec.head(nconv).transpose() * diagonal_matrix * edge.target().data().pvec.head(nconv);
      strm << (edge.source().id()+input_file_offset) << '\t';
      strm << (edge.target().id()-rows+input_file_offset) << '\t';
      strm << std::setprecision(8) <<prediction << '\n';
      return strm.str();
    } else return "";
  }
}; // end of prediction_saver


struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     row_id/col_id factor1 factor2 ... factor_k \n
     ==> where k is the number of converged singular values
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.id() < rows){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()+input_file_offset) + " ";
      for (uint i=0; i< nconv; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
    }
    else return "";
  }
 
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.id() >= rows){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()-rows+input_file_offset) + " ";
      for (uint i=0; i< nconv; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
  }
  else return "";
}
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
    const std::string& filename,
    const std::string& line) {

  //no need to parse
  if (filename == vecfile)
    return true;
  if (boost::ends_with(filename,"singular_values") || boost::ends_with(filename, "_v0"))
    return true;
  if (line.find("#") != std::string::npos)
    return true;


  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if (boost::ends_with(filename,".predict")) 
    role = edge_data::PREDICT;
 
  // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs = 1;
  strm >> source_id >> target_id;
  if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
    logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
    return true;
  }

  if (input_file_offset != 0){
     source_id-=input_file_offset; 
     target_id-=input_file_offset;
  }
  //if (source_id >= (uint)rows)
  if (source_id > (uint)rows)
    logstream(LOG_FATAL)<<"Problem at input line: [ " << line << " ] row id ( = " << source_id+input_file_offset << " ) should be <= than matrix rows (= " << rows << " ) " << std::endl;
  //if (target_id >= (uint)cols)
  if (target_id > (uint)cols)
    logstream(LOG_FATAL)<<"Problem at input line: [ " << line << " ] col id ( = " << target_id+input_file_offset << " ) should be <= than matrix cols (= " << cols << " ) " << std::endl;

  if (!binary)
     strm >> obs;
  if (!info.is_square())
    target_id = rows + target_id;

  if (source_id == target_id){
      vertex_data data;
      data.A_ii = obs;
      graph.add_vertex(source_id, data);
  }
  // Create an edge and add it to the graph
  else graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


void init_lanczos(graph_type * g, bipartite_graph_descriptor & info){

  if (g->num_vertices() == 0)
    logstream(LOG_FATAL)<<"Failed to load graph. Aborting" << std::endl;

  data_size = nsv + nv+1 + max_iter;
  actual_vector_len = data_size;
  if (info.is_square())
    actual_vector_len = 2*data_size;

  //assert(pengine);
  assert(actual_vector_len > 0);
  pgraph->transform_vertices(init_lanczos_mapr);

  logstream(LOG_INFO)<<"Allocated a total of: " << ((double)actual_vector_len * g->num_vertices() * sizeof(double)/ 1e6) << " MB for storing vectors." << std::endl;
}

void swork_vec(graph_type::vertex_type & vertex){
  vertex.data().pvec[nconv+kk] = 0;
  for (int ttt=nconv; ttt < nconv+n; ttt++){
    vertex.data().pvec[nconv+kk] += curvec(ttt-nconv)*vertex.data().pvec[ttt];
  }
}  

void compute_ritz(graph_type::vertex_type & vertex){
  if (!info.is_square())
    assert(vertex.id() - pcurrent->start >= 0);
 
  assert(nconv + n < vertex.data().pvec.size());
  assert(pcurrent->offset >= 0 && pcurrent->offset < vertex.data().pvec.size());

  int offset = pcurrent->offset + nconv;
  assert(offset + n < actual_vector_len);
  if (info.is_square() && !v_vector)
    offset += data_size;
  vec tmp = init_vec(&vertex.data().pvec[offset], n);
  tmp = tmp.transpose() * (v_vector ? PT : a);
  memcpy(&vertex.data().pvec[offset] ,&tmp[0], kk*sizeof(double)); 
  if (debug)
     std::cout<<"Entered ritz with " << offset << " , v_vector: " << v_vector << "data_size: " << data_size << " kk: " << kk << std::endl;
}  


void lanczos(bipartite_graph_descriptor & info, timer & mytimer, vec & errest, 
    const std::string & vecfile){

  int its = 1;
  DistMat A(info);
  DistSlicedMat U(info.is_square() ? data_size : 0, info.is_square() ? 2*data_size : data_size, true, info, "U");
  DistSlicedMat V(0, data_size, false, info, "V");
  vec alpha, beta, b;
  vec sigma = zeros(data_size);
  errest = zeros(nv);
  DistVec v_0(info, 0, false, "v_0");
  if (vecfile.size() == 0)
    v_0 = randu(size(A,2));
  PRINT_VEC2("svd->V", v_0);
  PRINT_VEC(V[0]);
   
  DistDouble vnorm = norm(v_0);
  v_0=v_0/vnorm;
  PRINT_INT(nv);

  while(nconv < nsv && its < max_iter){
    logstream(LOG_EMPH)<<"Starting iteration: " << its << " at time: " << mytimer.current_time() << std::endl;
    int k = nconv;
    n  = nv;
    PRINT_INT(k);
    PRINT_INT(n);

    alpha = zeros(n);
    beta = zeros(n);

    U[k] = V[k]*A._transpose();
    PRINT_VEC(U[k]);
    orthogonalize_vs_all(U, k, alpha(0));
    PRINT_VEC(U[k]);
    PRINT_VEC3("alpha", alpha, 0);

    for (int i=k+1; i<n; i++){
      logstream(LOG_EMPH) <<"Starting step: " << i << " at time: " << mytimer.current_time() << std::endl;
      PRINT_INT(i);

      V[i]=U[i-1]*A;
      PRINT_VEC(V[i]);
      orthogonalize_vs_all(V, i, beta(i-k-1));
      PRINT_VEC(V[i]);

      PRINT_VEC3("beta", beta, i-k-1);

      U[i] = V[i]*A._transpose();
      orthogonalize_vs_all(U, i, alpha(i-k));
      PRINT_VEC3("alpha", alpha, i-k);
    }
    
    V[n]= U[n-1]*A;
    orthogonalize_vs_all(V, n, beta(n-k-1));
    PRINT_VEC3("beta", beta, n-k-1);

    //compute svd of bidiagonal matrix
    BEGIN_TRACEPOINT(svd_bidiagonal);
    PRINT_INT(nv);
    PRINT_NAMED_INT("svd->nconv", nconv);
    n = nv - nconv;
    PRINT_INT(n);
    alpha.conservativeResize(n);
    beta.conservativeResize(n);
    
    PRINT_MAT2("Q",eye(n));
    PRINT_MAT2("PT",eye(n));
    PRINT_VEC2("alpha",alpha);
    PRINT_VEC2("beta",beta);

    mat T=diag(alpha);
    for (int i=0; i<n-1; i++)
      set_val(T, i, i+1, beta(i));
    PRINT_MAT2("T", T);
    
    svd(T, a, PT, b);
    PRINT_MAT2("Q", a);
    alpha=b.transpose();
    PRINT_MAT2("alpha", alpha);
    for (int t=0; t< n-1; t++)
      beta(t) = 0;
    PRINT_VEC2("beta",beta);
    PRINT_MAT2("PT", PT.transpose());
    END_TRACEPOINT(svd_bidiagonal);
    
    //estiamte the error
    BEGIN_TRACEPOINT(svd_error_estimate);
    kk = 0;
    for (int i=nconv; i < nv; i++){
      int j = i-nconv;
      PRINT_INT(j);
      sigma(i) = alpha(j);
      PRINT_NAMED_DBL("svd->sigma[i]", sigma(i));
      PRINT_NAMED_DBL("Q[j*n+n-1]",a(n-1,j));
      PRINT_NAMED_DBL("beta[n-1]",beta(n-1));
      errest(i) = abs(a(n-1,j)*beta(n-1));
      PRINT_NAMED_DBL("svd->errest[i]", errest(i));
      if (alpha(j) >  tol){
        errest(i) = errest(i) / alpha(j);
        PRINT_NAMED_DBL("svd->errest[i]", errest(i));
      }
      if (errest(i) < tol){
        kk = kk+1;
        PRINT_NAMED_INT("k",kk);
      }


      if (nconv +kk >= nsv){
        printf("set status to tol\n");
        finished = true;
      }
    }//end for
    PRINT_NAMED_INT("k",kk);
    END_TRACEPOINT(svd_error_estimate)

    //vec v;
    if (!finished){
      BEGIN_TRACEPOINT(svd_swork);
      curvec=get_col(PT,kk); 
      DistVec v = V[nconv];
      pcurrent = &v;
      graphlab::vertex_set nodes = pgraph->select(select_in_range);
      pgraph->transform_vertices(swork_vec, nodes);
      
      PRINT_MAT2("swork", curvec);
      PRINT_VEC2("svd->V",V[nconv]);
      //PRINT_VEC2("v[0]",v); 
      END_TRACEPOINT(svd_swork);
    }

    //compute the ritz eigenvectors of the converged singular triplets
    DistVec v = V[nconv];
    if (kk > 0){
      PRINT_VEC2("svd->V", V[nconv]);
      BEGIN_TRACEPOINT(matproduct);
      v = V[nconv];
      pcurrent = &v;
      v_vector = true;
      graphlab::vertex_set nodes = pgraph->select(select_in_range);
      pgraph->transform_vertices(compute_ritz, nodes);
      PRINT_VEC2("svd->V", V[nconv]);
      v = U[nconv];
      pcurrent = &v;
      v_vector = false;
      PRINT_VEC2("svd->U", U[nconv]);
      nodes = pgraph->select(select_in_range);
      pgraph->transform_vertices(compute_ritz, nodes);
      END_TRACEPOINT(matproduct);
      PRINT_VEC2("svd->U", U[nconv]);
    }

    nconv=nconv+kk;
    if (finished)
      break;

    V[nconv]=v;
    PRINT_VEC2("svd->V", V[nconv]);
    PRINT_NAMED_INT("svd->nconv", nconv);

    its++;
    PRINT_NAMED_INT("svd->its", its);
    PRINT_NAMED_INT("svd->nconv", nconv);
    PRINT_NAMED_INT("nv",nv);

  } // end(while)

  printf(" Number of computed signular values %d",nconv);
  printf("\n");
  DistVec normret(info, nconv, false, "normret");
  DistVec normret_tranpose(info, nconv, true, "normret_tranpose");
  INITIALIZE_TRACER(svd_error2, "svd error2");
  BEGIN_TRACEPOINT(svd_error2);
  for (int i=0; i < std::min(nsv,nconv); i++){
    normret = V[i]*A._transpose() -U[i]*sigma(i);
    double n1 = norm(normret).toDouble();
    PRINT_DBL(n1);
    normret_tranpose = U[i]*A -V[i]*sigma(i);
    double n2 = norm(normret_tranpose).toDouble();
    PRINT_DBL(n2);
    double err=sqrt(n1*n1+n2*n2);
    PRINT_DBL(err);
    PRINT_DBL(tol);
    if (sigma(i)>tol){
      err = err/sigma(i);
    }
    PRINT_DBL(err);
    PRINT_DBL(sigma(i));
    printf("Singular value %d \t%13.6g\tError estimate: %13.6g\n", i, sigma(i),err);
  }
  END_TRACEPOINT(svd_error2);

  if (save_vectors){
    if (nconv == 0)
      logstream(LOG_FATAL)<<"No converged vectors. Aborting the save operation" << std::endl;
    if (predictions == "")
      logstream(LOG_FATAL)<<"Please specify prediction output fie name using the --predictions=filename command"<<std::endl;

    BEGIN_TRACEPOINT(svd_vectors);
    std::cout << "Saving singular value triplets to files: " << predictions << ".U.* and "<< predictions << ".V.*" <<std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;
    pgraph->save(predictions + ".U", linear_model_saver_U(),
          gzip_output, save_edges, save_vertices, threads_per_machine);
      pgraph->save(predictions + ".V", linear_model_saver_V(),
          gzip_output, save_edges, save_vertices, threads_per_machine);
    END_TRACEPOINT(svd_vectors);
  }

  sigma.conservativeResize(nconv);
  singular_values = sigma;
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;

    //save the predictions
    pgraph->save(predictions, prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }
 
}

void start_engine(){
  vertex_set nodes = pgraph->select(selected_node);
  pengine->signal_vset(nodes);
  pengine->start();
}

void write_output_vector(const std::string datafile, const vec & output, bool issparse, std::string comment)
{
  FILE * f = fopen(datafile.c_str(),"w");
  if (f == NULL)
    logstream(LOG_FATAL)<<"Failed to open file: " << datafile << " for writing. " << std::endl;

  if (comment.size() > 0) // add a comment to the matrix market header
    fprintf(f, "%c%s\n", '%', comment.c_str());
    for (int j=0; j<(int)output.size(); j++){
    fprintf(f, "%10.13g\n", output[j]);
  }

  fclose(f);
}


int main(int argc, char** argv) {
  global_logger().set_log_to_console(true);

  INITIALIZE_TRACER(svd_bidiagonal, "svd bidiagonal");
  INITIALIZE_TRACER(svd_error_estimate, "svd error estimate");
  INITIALIZE_TRACER(svd_swork, "Svd swork");
  INITIALIZE_TRACER(matproduct, "computing ritz eigenvectors");
  INITIALIZE_TRACER(svd_bidiagonal, "svd bidiagonal");
  INITIALIZE_TRACER(svd_vectors, "svd vectors");

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the gklanczos factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir, output_dir;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
      "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("initial_vector", vecfile,"optional initial vector");
  clopts.attach_option("debug", debug, "Display debug output.");
  clopts.attach_option("unittest", unittest,  
      "unit testing 0=None, 1=3x3 matrix, 2=10x10 matrix, 3 = 100x100 matrix");
  clopts.attach_option("max_iter", max_iter, "max iterations");
  clopts.attach_option("ortho_repeats", ortho_repeats, "orthogonalization iterations. 1 = low accuracy but fast, 2 = medium accuracy, 3 = high accuracy but slow.");
  clopts.attach_option("nv", nv, "Number of vectors in each iteration");
  clopts.attach_option("nsv", nsv, "Number of requested singular values to comptue"); 
  clopts.attach_option("tol", tol, "convergence threshold");
  clopts.attach_option("save_vectors", save_vectors, "save output matrices U and V.");
  clopts.attach_option("rows", rows, "number of rows");
  clopts.attach_option("cols", cols, "number of cols");
  clopts.attach_option("quiet", quiet, "quiet mode (less verbose)");
  clopts.attach_option("predictions", predictions, "predictions file prefix");
  clopts.attach_option("binary", binary, "If true, all edges are weighted as one");
  clopts.attach_option("input_file_offset", input_file_offset, "input file node id offset (default 0)");
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (quiet){
    global_logger().set_log_level(LOG_ERROR);
    debug = false;
  }
  if (unittest == 1){
    datafile = "gklanczos_testA/"; 
    vecfile = "gklanczos_testA_v0";
    nsv = 3; nv = 3;
    rows = 3; cols = 4;
    debug = true;
    input_file_offset = 1;
  }
  else if (unittest == 2){
    datafile = "gklanczos_testB/";
    vecfile = "gklanczos_testB_v0";
    nsv = 10; nv = 10;
    rows = 10; cols = 10;
    debug = true;  max_iter = 100;
    input_file_offset = 1;
  }
  else if (unittest == 3){
    datafile = "gklanczos_testC/";
    vecfile = "gklanczos_testC_v0";
    nsv = 4; nv = 10;
    rows = 25; cols = 25;
    debug = true;  max_iter = 100;
    input_file_offset = 1;
  }
  else if (unittest == 4){
    datafile= "A2/";
    vecfile = "A2/A2_v0";
    nsv=3; nv = 4; 
    rows=3; cols = 4;
    debug=true; max_iter=3;
    input_file_offset = 1;
  }


  if (rows <= 0 || cols <= 0)
    logstream(LOG_FATAL)<<"Please specify number of rows/cols of the input matrix" << std::endl;

  if (rows == cols){
    logstream(LOG_WARNING)<<"GraphLab SVD does not support square matrices. Increasing row size by one." << std::endl;
    rows++; 
  }
  info.rows = rows;
  info.cols = cols;

  if (nv < nsv){
    logstream(LOG_FATAL)<<"Please set the number of vectors --nv=XX, to be at least the number of support vectors --nsv=XX or larger" << std::endl;
  }

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  pgraph = &graph;
  dc.cout() << "Loading graph. Finished in " 
    << timer.current_time() << std::endl;
  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
    << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     

  dc.cout() 
    << "========== Graph statistics on proc " << dc.procid() 
    << " ==============="
    << "\n Num vertices: " << graph.num_vertices()
    << "\n Num edges: " << graph.num_edges()
    << "\n Num replica: " << graph.num_replicas()
    << "\n Replica to vertex ratio: " 
    << float(graph.num_replicas())/graph.num_vertices()
    << "\n --------------------------------------------" 
    << "\n Num local own vertices: " << graph.num_local_own_vertices()
    << "\n Num local vertices: " << graph.num_local_vertices()
    << "\n Replica to own ratio: " 
    << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
    << "\n Num local edges: " << graph.num_local_edges()
    //<< "\n Begin edge id: " << graph.global_eid(0)
    << "\n Edge balance ratio: " 
    << float(graph.num_local_edges())/graph.num_edges()
    << std::endl;

  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);
  pengine = &engine;

  dc.cout() << "Running SVD (gklanczos)" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  timer.start();

  init_lanczos(&graph, info);
  init_math(&graph, info, ortho_repeats, update_function);
  if (vecfile.size() > 0){
    std::cout << "Load inital vector from file" << vecfile << std::endl;
    FILE * file = fopen((vecfile).c_str(), "r");
    if (file == NULL)
      logstream(LOG_FATAL)<<"Failed to open initial vector"<< std::endl;
    vec input = vec::Zero(rows);
    double val = 0;
    for (int i=0; i< rows; i++){
      int rc = fscanf(file, "%lg\n", &val);
      if (rc != 1)
        logstream(LOG_FATAL)<<"Failed to read initial vector (on line: "<< i << " ) " << std::endl;
      input[i] = val;
    }
    fclose(file);
    DistVec v0(info, 0, false, "v0");
    v0 = input;
  }  

  vec errest;
  lanczos( info, timer, errest, vecfile);

  if (graphlab::mpi_tools::rank()==0)
    write_output_vector(predictions + ".singular_values", singular_values, false, "%GraphLab SVD Solver library. This file contains the singular values.");

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
    << std::endl
    << "Final Runtime (seconds):   " << runtime 
                                        << std::endl
                                        << "Updates executed: " << engine.num_updates() << std::endl
                                        << "Update Rate (updates/second): " 
                                          << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  if (unittest == 1){
    assert(errest.size() == 3);
    for (int i=0; i< errest.size(); i++)
      assert(errest[i] < 1e-30);
  }
  else if (unittest == 2){
    assert(errest.size() == 10);
    for (int i=0; i< errest.size(); i++)
      assert(errest[i] < 1e-15);
  }
  else if (unittest == 4){
    assert(pow(singular_values[0]-  2.16097, 2) < 1e-8);
    assert(pow(singular_values[2]-  0.554159, 2) < 1e-8);
   }
 

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/svdpp.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * \brief The main file for the BIAS-SGD matrix factorization algorithm.
 *
 * This file contains the main body of the BIAS-SGD matrix factorization
 * algorithm. 
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>
#include "eigen_serialization.hpp"
#include <Eigen/Dense>
#include <graphlab/macros_def.hpp>


typedef Eigen::VectorXd vec_type;
typedef Eigen::MatrixXd mat_type;

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
static bool debug;
int iter = 0;
float itmBiasStep = 1e-4;
float itmBiasReg = 1e-4;
float usrBiasStep = 1e-4;
float usrBiasReg = 1e-4;
float usrFctrStep = 1e-4;
float usrFctrReg = 1e-4;
float itmFctrStep = 1e-4;
float itmFctrReg = 1e-4; //gamma7
float itmFctr2Step = 1e-4;
float itmFctr2Reg = 1e-4;
/** 
 * \ingroup toolkit_matrix_pvecization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the BIASSGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the BIASSGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The number of times this vertex has been updated. */
  uint32_t nupdates;
  /** \brief The latent pvec for this vertex */
  vec_type pvec;
  vec_type weight;
  double bias;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : nupdates(0) { randomize(); } 
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(NLATENT); pvec.setRandom(); weight.resize(NLATENT); weight.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << nupdates << pvec << weight << bias;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> nupdates >> pvec >> weight >> bias;
  }
}; // end of vertex data


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data svdppo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

double extract_l2_error(const graph_type::edge_type & edge);


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
    const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 * \brief The gather type used to construct XtX and Xty needed for the BIASSGD
 * update
 *
 * To compute the ALS update we need to compute the sum of 
 * \code
 *  sum: XtX = nbr.pvec.transpose() * nbr.pvec 
 *  sum: Xy  = nbr.pvec * edge.obs
 * \endcode
 * For each of the neighbors of a vertex. 
 *
 * To do this in the Gather-Apply-Scatter model the gather function
 * computes and returns a pair consisting of XtX and Xy which are then
 * added. The gather type represents that tuple and provides the
 * necessary gather_type::operator+= operation.
 *
 */
class gather_type {
  public:
    /**
     * \brief Stores the current sum of nbr.pvec.transpose() *
     * nbr.pvec
     */

    /**
     * \brief Stores the current sum of nbr.pvec * edge.obs
     */
    vec_type pvec;
    vec_type weight;
    double bias;

    /** \brief basic default constructor */
    gather_type() { }

    /**
     * \brief This constructor computes XtX and Xy and stores the result
     * in XtX and Xy
     */
    gather_type(const vec_type& X, const vec_type & _weight, double _bias) {
      pvec = X;
      bias = _bias;
      weight = _weight;
    } // end of constructor for gather type

    /** \brief Save the values to a binary archive */
    void save(graphlab::oarchive& arc) const { arc << pvec << bias << weight; }

    /** \brief Read the values from a binary archive */
    void load(graphlab::iarchive& arc) { arc >> pvec >> bias >> weight; }  

    /** 
     * \brief Computes XtX += other.XtX and Xy += other.Xy updating this
     * tuples value
     */
    gather_type& operator+=(const gather_type& other) {
      if (pvec.size() == 0){
        pvec = other.pvec;
        bias = other.bias;
        weight = other.weight;
        return *this;
      }
      else if (other.pvec.size() == 0)
        return *this;
      pvec += other.pvec;
      bias += other.bias;
      weight += other.weight;
      return *this;
    } // end of operator+=

}; // end of gather type

//typedef gather_type message_type;


enum{
  PHASE1 = 0, PHASE2 = 1
};

/**
 * BIASSGD vertex program type
 */ 
class svdpp_vertex_program : 
  public graphlab::ivertex_program<graph_type, gather_type,
  gather_type> {
    public:
      /** The convergence tolerance */
      static double TOLERANCE;
      static double LAMBDA;
      static double GAMMA;
      static double MAXVAL;
      static double MINVAL;
      static double STEP_DEC;
      static bool debug;
      static size_t MAX_UPDATES;
      static double GLOBAL_MEAN;
      static size_t NUM_TRAINING_EDGES;
      static uint   USERS;

      gather_type pmsg;
      void save(graphlab::oarchive& arc) const { 
        arc << pmsg;
      }
      /** \brief Load the vertex data from a binary archive */
      void load(graphlab::iarchive& arc) { 
        arc >> pmsg;
      }

      /** The set of edges to gather along */
      edge_dir_type gather_edges(icontext_type& context, 
          const vertex_type& vertex) const { 
        return graphlab::ALL_EDGES; 
      }; // end of gather_edges 

      gather_type gather(icontext_type& context, const vertex_type& vertex, 
          edge_type& edge) const {
        vec_type step = vec_type::Zero(vertex_data::NLATENT);
        double bias =0, other_bias = 0;
        vec_type delta, other_delta;

        //user node
        if (vertex.num_in_edges() == 0){
          vertex_type other_vertex(get_other_vertex(edge, vertex));
          vertex_type my_vertex(vertex);

          int phase = my_vertex.data().nupdates % 2; 
          if (phase == PHASE1){
            //my_vertex.data().weight += movie.weight;
            context.signal(other_vertex, gather_type(vec_type::Zero(vertex_data::NLATENT), vec_type::Zero(vertex_data::NLATENT), 0));
            return gather_type(vec_type::Zero(vertex_data::NLATENT), other_vertex.data().weight, 0);
          }
          else if (phase == PHASE2){
            //vertex_data & my_data = my_vertex.data();
            double pred = svdpp_vertex_program::GLOBAL_MEAN + 
              my_vertex.data().bias + other_vertex.data().bias + my_vertex.data().pvec.dot(other_vertex.data().pvec+other_vertex.data().weight);
            pred = std::min(pred, svdpp_vertex_program::MAXVAL);
            pred = std::max(pred, svdpp_vertex_program::MINVAL); 
            const float err = edge.data().obs - pred;
            if (debug)
              std::cout<<"entering edge " << (int)edge.source().id() << ":" << (int)edge.target().id() << " err: " << err << " rmse: " << err*err <<std::endl;
            if (std::isnan(err))
              logstream(LOG_FATAL)<<"Got into numeric errors.. try to tune step size and regularization using command line flags" << std::endl;
            if (edge.data().role == edge_data::TRAIN){
              vec_type itmFctr = other_vertex.data().pvec;
              vec_type usrFctr = my_vertex.data().pvec;

              bias = usrBiasStep*(err - usrBiasReg*bias);
              other_bias = itmBiasStep*(err - itmBiasReg*other_bias);

              delta = usrFctrStep*(err*(itmFctr - usrFctrReg *usrFctr));
              other_delta = itmFctrStep*(err*(usrFctr+my_vertex.data().weight) - itmFctrReg*other_vertex.data().pvec);

              step = err*itmFctr;
              float usrNorm = double(1.0/sqrt(my_vertex.num_out_edges()));
              step *= itmFctr2Step*usrNorm;

              double mult = itmFctr2Step*itmFctr2Reg;
              step -= mult*other_vertex.data().weight; 
              //A HACK: update memory cached values to reflect new vals 
              /*my_vertex.data().bias += bias;
                other_vertex.data().bias += other_bias;
                my_vertex.data().pvec += delta;
                other_vertex.data().pvec += other_delta;*/

              if (debug)
                std::cout<<"new val:" << (int)edge.source().id() << ":" << (int)edge.target().id() << " U " << my_vertex.data().pvec.transpose() << " V " << other_vertex.data().pvec.transpose() << std::endl;

              if(other_vertex.data().nupdates < MAX_UPDATES) 
                context.signal(other_vertex, gather_type(other_delta, step, other_bias));
            }
            return gather_type(delta, step, bias);

          } //end of PHASE2
        }
        return gather_type(delta, step, bias);
      }  

      //typedef vec_type message_type;
      void init(icontext_type& context,
          const vertex_type& vertex,
          const message_type& msg) {

        int phase = vertex.data().nupdates % 2;
        //movie node receives updates here
        if (vertex.num_in_edges() > 0){
          if (phase == PHASE1){
            pmsg = msg;
          }
          else if (phase == PHASE2){
            pmsg = msg;
          }
        }
      }

      /** apply collects the sum of XtX and Xy */
      void apply(icontext_type& context, vertex_type& vertex,
          const gather_type& sum) {
        vertex_data& vdata = vertex.data(); 
        int phase = vdata.nupdates %2;

        if (phase == PHASE1){
          //user node receives the sum of movie weights
          if (vertex.num_out_edges() > 0){
            vertex.data().weight = sum.weight;
            float usrNorm = double(1.0/sqrt(vertex.num_out_edges()));
            vertex.data().weight *= usrNorm;
          }
          //movie node doe nothing
          else {}
        }
        else if (phase == PHASE2){
          //user node update gradients and bias
          if (vertex.num_in_edges() == 0){
            vdata.pvec += sum.pvec;
            vdata.bias += sum.bias;
            //does not update weight here (since was done in phase1)
          }
          //movie node
          else {
            vdata.weight += pmsg.weight; //step
            vdata.pvec += pmsg.pvec;
            vdata.bias += pmsg.bias;
          }
        }
        ++vdata.nupdates;
      } // end of apply

      /** The edges to scatter along */
      edge_dir_type scatter_edges(icontext_type& context,
          const vertex_type& vertex) const { 
        return graphlab::ALL_EDGES; 
      }; // end of scatter edges

      /** Scatter reschedules neighbors */  
      void scatter(icontext_type& context, const vertex_type& vertex, 
          edge_type& edge) const {
        edge_data& edata = edge.data();
        if(edata.role == edge_data::TRAIN) {
          const vertex_type other_vertex = get_other_vertex(edge, vertex);
          // Reschedule neighbors ------------------------------------------------
          if(other_vertex.data().nupdates < MAX_UPDATES) 
            context.signal(other_vertex, gather_type(vec_type::Zero(vertex_data::NLATENT),vec_type::Zero(vertex_data::NLATENT),0));
        }
      } // end of scatter function


      /**
       * \brief Signal all vertices on one side of the bipartite graph
       */
      static graphlab::empty signal_left(icontext_type& context,
          vertex_type& vertex) {
        if(vertex.num_out_edges() > 0) context.signal(vertex, gather_type(vec_type::Zero(vertex_data::NLATENT),vec_type::Zero(vertex_data::NLATENT),0));
        return graphlab::empty();
      } // end of signal_left 

  }; // end of svdpp vertex program


struct error_aggregator : public graphlab::IS_POD_TYPE {
  typedef svdpp_vertex_program::icontext_type icontext_type;
  typedef graph_type::edge_type edge_type;
  double train_error, validation_error;
  size_t ntrain, nvalidation;
  error_aggregator() : 
    train_error(0), validation_error(0), ntrain(0), nvalidation(0) { }
  error_aggregator& operator+=(const error_aggregator& other) {
    train_error += other.train_error;
    assert(!std::isnan(train_error));
    validation_error += other.validation_error;
    ntrain += other.ntrain;
    nvalidation += other.nvalidation;
    return *this;
  }
  static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
    error_aggregator agg;
    if (edge.data().role == edge_data::TRAIN){
      agg.train_error = extract_l2_error(edge); agg.ntrain = 1;
      assert(!std::isnan(agg.train_error));
    }
    else if (edge.data().role == edge_data::VALIDATE){
      agg.validation_error = extract_l2_error(edge); agg.nvalidation = 1;
    }
    return agg;
  }


  static void finalize(icontext_type& context, const error_aggregator& agg) {
    iter++;
    if (iter%2 == 0)
      return; 
    ASSERT_GT(agg.ntrain, 0);
    const double train_error = std::sqrt(agg.train_error / agg.ntrain);
    assert(!std::isnan(train_error));
    context.cout() << std::setw(8) << context.elapsed_seconds() << std::setw(8) << train_error;
    if(agg.nvalidation > 0) {
      const double validation_error = 
        std::sqrt(agg.validation_error / agg.nvalidation);
      context.cout() << std::setw(8) << validation_error; 
    }
    context.cout() << std::endl;
    usrBiasStep *= svdpp_vertex_program::STEP_DEC;
    itmBiasStep *= svdpp_vertex_program::STEP_DEC;
    usrFctrStep  *= svdpp_vertex_program::STEP_DEC;
    itmFctrStep  *= svdpp_vertex_program::STEP_DEC;
    itmFctr2Step *= svdpp_vertex_program::STEP_DEC;

  }
}; // end of error aggregator

/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred = svdpp_vertex_program::GLOBAL_MEAN + 
    edge.source().data().bias +
    edge.target().data().bias + 
    edge.source().data().pvec.dot(edge.target().data().pvec);
  pred = std::min(svdpp_vertex_program::MAXVAL, pred);
  pred = std::max(svdpp_vertex_program::MINVAL, pred);
  double rmse = (edge.data().obs - pred) * (edge.data().obs - pred);
  assert(rmse <= pow(svdpp_vertex_program::MAXVAL-svdpp_vertex_program::MINVAL,2));
  return rmse;
} // end of extract_l2_error


struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if (edge.data().role != edge_data::PREDICT)
      return "";

    std::stringstream strm;
    double pred = svdpp_vertex_program::GLOBAL_MEAN +
      edge.target().data().bias + edge.source().data().bias + edge.source().data().pvec.dot(edge.target().data().pvec+edge.target().data().weight);
      pred = std::min(pred, svdpp_vertex_program::MAXVAL);
      pred = std::max(pred, svdpp_vertex_program::MINVAL);
    strm << edge.source().id() << '\t' 
      << -edge.target().id()-SAFE_NEG_OFFSET << '\t'
      << pred << '\n';
    return strm.str();
  }
}; // end of prediction_saver

struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
      ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_bias_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      ret += boost::lexical_cast<std::string>(vertex.data().bias) + "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 
struct linear_model_saver_bias_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
     */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + " ";
      ret += boost::lexical_cast<std::string>(vertex.data().bias) + "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
    const std::string& filename,
    const std::string& line) {

 // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0);
  strm >> source_id >> target_id;

  if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
    logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
    return true;
  }

  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
 
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    strm >> obs;
    if (obs < svdpp_vertex_program::MINVAL || obs > svdpp_vertex_program::MAXVAL){
      logstream(LOG_WARNING)<<"Rating values should be between " << svdpp_vertex_program::MINVAL << " and " << svdpp_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
      assert(false); 
    }
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


size_t vertex_data::NLATENT = 20;
double svdpp_vertex_program::TOLERANCE = 1e-3;
double svdpp_vertex_program::LAMBDA = 0.001;
double svdpp_vertex_program::GAMMA = 0.001;
size_t svdpp_vertex_program::MAX_UPDATES = -1;
double svdpp_vertex_program::MAXVAL = 1e+100;
double svdpp_vertex_program::MINVAL = -1e+100;
double svdpp_vertex_program::STEP_DEC = 0.9;
bool svdpp_vertex_program::debug = false;
double svdpp_vertex_program::GLOBAL_MEAN = 0;
size_t svdpp_vertex_program::NUM_TRAINING_EDGES = 0;

/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<svdpp_vertex_program> engine_type;

  double calc_global_mean(const graph_type::edge_type & edge){
    if (edge.data().role == edge_data::TRAIN)
      return edge.data().obs;
    else return 0;
  }

  size_t count_edges(const graph_type::edge_type & edge){
    if (edge.data().role == edge_data::TRAIN)
      return 1;
    else return 0;
  }


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir, output_dir;
  std::string predictions;
  size_t interval = 0;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
      "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D", vertex_data::NLATENT,
      "Number of latent parameters to use.");
  clopts.attach_option("engine", exec_type, 
      "The engine type synchronous or asynchronous");
  clopts.attach_option("max_iter", svdpp_vertex_program::MAX_UPDATES,
      "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("lambda", svdpp_vertex_program::LAMBDA, 
      "SGD regularization weight"); 
  clopts.attach_option("gamma", svdpp_vertex_program::GAMMA, 
      "SGD step size"); 
  clopts.attach_option("debug", svdpp_vertex_program::debug, 
      "debug - additional verbose info"); 
  clopts.attach_option("tol", svdpp_vertex_program::TOLERANCE,
      "residual termination threshold");
  clopts.attach_option("maxval", svdpp_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", svdpp_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("step_dec", svdpp_vertex_program::STEP_DEC, "multiplicative step decrement");
  clopts.attach_option("user_bias_step", usrBiasStep, "user_bias_step");
  clopts.attach_option("user_bias_reg", usrBiasReg, "user_bias_reg");
  clopts.attach_option("item_bias_step",itmBiasStep, "item_bias_step");
  clopts.attach_option("item_bias_reg", itmBiasReg, "item_bias_reg");
  clopts.attach_option("user_factor_step", usrFctrStep, "user_factor_step");
  clopts.attach_option("user_factor_reg", usrFctrReg, "user_factor_reg");
  clopts.attach_option("item_factor_step", itmFctrStep, "item_factor_step");
  clopts.attach_option("item_factor_reg", itmFctrReg, "item_factor_reg");
  clopts.attach_option("item_factor2_step", itmFctr2Step, "item_factor2_step");
  clopts.attach_option("item_factor2_reg", itmFctr2Reg, "item_factor2_reg");
  clopts.attach_option("interval", interval, "The time in seconds between error reports");
  clopts.attach_option("predictions", predictions,
      "The prefix (folder and filename) to save predictions.");
  clopts.attach_option("output", output_dir, "Output results");

  parse_implicit_command_line(clopts);

  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  debug = svdpp_vertex_program::debug;
  //  omp_set_num_threads(clopts.get_ncpus());
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
    << timer.current_time() << std::endl;
 
  if (dc.procid() == 0) 
    add_implicit_edges<edge_data>(implicitratingtype, graph, dc);

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
    << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
    << "========== Graph statistics on proc " << dc.procid() 
    << " ==============="
    << "\n Num vertices: " << graph.num_vertices()
    << "\n Num edges: " << graph.num_edges()
    << "\n Num replica: " << graph.num_replicas()
    << "\n Replica to vertex ratio: " 
    << float(graph.num_replicas())/graph.num_vertices()
    << "\n --------------------------------------------" 
    << "\n Num local own vertices: " << graph.num_local_own_vertices()
    << "\n Num local vertices: " << graph.num_local_vertices()
    << "\n Replica to own ratio: " 
    << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
    << "\n Num local edges: " << graph.num_local_edges()
    //<< "\n Begin edge id: " << graph.global_eid(0)
    << "\n Edge balance ratio: " 
    << float(graph.num_local_edges())/graph.num_edges()
    << std::endl;

  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);

  // Add error reporting to the engine
  const bool success = engine.add_edge_aggregator<error_aggregator>
    ("error", error_aggregator::map, error_aggregator::finalize) &&
    engine.aggregate_periodic("error", interval);
  ASSERT_TRUE(success);


  svdpp_vertex_program::GLOBAL_MEAN = graph.map_reduce_edges<double>(calc_global_mean);
  svdpp_vertex_program::NUM_TRAINING_EDGES = graph.map_reduce_edges<size_t>(count_edges);
  svdpp_vertex_program::GLOBAL_MEAN /= svdpp_vertex_program::NUM_TRAINING_EDGES;
  dc.cout() << "Global mean is: " <<svdpp_vertex_program::GLOBAL_MEAN << std::endl;

  // Signal all vertices on the vertices on the left (libersgd) 
  engine.map_reduce_vertices<graphlab::empty>(svdpp_vertex_program::signal_left);


  // Run the PageRank ---------------------------------------------------------
  dc.cout() << "Running SVD++" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  dc.cout() << "Time   Training    Validation" <<std::endl;
  dc.cout() << "       RMSE        RMSE " <<std::endl;
  timer.start();
  engine.start();  

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
    << std::endl
    << "Final Runtime (seconds):   " << runtime 
                                        << std::endl
                                        << "Updates executed: " << engine.num_updates() << std::endl
                                        << "Update Rate (updates/second): " 
                                          << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  engine.aggregate_now("error");

  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;
    graph.save(predictions, prediction_saver(),
        gzip_output, save_vertices, 
        save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
        gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
        gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".bias.U", linear_model_saver_bias_U(),
        gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".bias.V", linear_model_saver_bias_V(),
        gzip_output, save_edges, save_vertices, threads_per_machine);

  }


  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/types.hpp
================================================
#ifndef TYPES_COMMON
#define TYPES_COMMON

typedef double real_type;

/*
 * store a matrix is a bipartite graph. One side is the rows and the other is the column.
 */
struct bipartite_graph_descriptor {
  int rows, cols;
  size_t nonzeros;
  bool force_non_square; //do not optimize, so each row and column will get its own graph node, even if the matrix is square

  bipartite_graph_descriptor() : rows(0), cols(0), nonzeros(0), force_non_square(false) { }

   // is the matrix square?
  bool is_square() const { return rows == cols && !force_non_square; }
  // get the position of the starting row/col node
  int get_start_node(bool _rows) const { if (is_square()) return 0; else return (_rows?0:rows); }
  // get the position of the ending row/col node 
  int get_end_node(bool _rows) const { if (is_square()) return rows; else return (_rows?rows:(rows+cols)); }
  // get howmany row/column nodes
  int num_nodes(bool _rows) const { if (_rows) return rows; else return cols; }
  // how many total nodes
  int total() const { if (is_square()) return rows; else return rows+cols; }
  //is this a row node
  bool is_row_node(int id) const { return id < rows; }
  //debug print?
  bool toprint(int id) const { return (id == 0) || (id == rows - 1) || (id == rows) || (id == rows+cols-1); }
  
}; // end of bipartite graph descriptor


#endif


================================================
FILE: toolkits/collaborative_filtering/wals.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 * 
 * This file contains an implementation of the weighted-ALS matrix factorization
 * algorithm. As described in:  Collaborative Filtering for Implicit Feedback Datasets Hu, Y.; Koren, Y.; Volinsky, C. IEEE International Conference on Data Mining (ICDM 2008), IEEE (2008). 
 *
 * Code written By Danny Bickson, based on code by Joey Gonzalez
 */

#include <Eigen/Dense>

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


// This file defines the serialization code for the eigen types.
#include "eigen_serialization.hpp"

#include <graphlab.hpp>
#include <graphlab/util/stl_util.hpp>
#include "stats.hpp"

#include <graphlab/macros_def.hpp>

const int SAFE_NEG_OFFSET = 2; //add 2 to negative node id
//to prevent -0 and -1 which arenot allowed

/**
 * \brief We use the eigen library's vector type to represent
 * mathematical vectors.
 */
typedef Eigen::VectorXd vec_type;

/**
 * \brief We use the eigen library's matrix type to represent
 * matrices.
 */
typedef Eigen::MatrixXd mat_type;


/**
 * \brief Remap the target id of each edge into a different id space
 * than the source id.
 */
bool REMAP_TARGET = true;


/** 
 * \ingroup toolkit_matrix_factorization
 *
 * \brief the vertex data type which contains the latent factor.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the ALS graph.  Associated with each vertex is a factor
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the ALS algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column factors.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The number of times this vertex has been updated. */
  uint32_t nupdates;
  /** \brief The most recent L1 change in the factor value */
  float residual; //! how much the latent value has changed
  /** \brief The latent factor for this vertex */
  vec_type factor;
  /** 
   * \brief Simple default constructor which randomizes the vertex
   *  data 
   */
  vertex_data() : nupdates(0), residual(1) { randomize(); } 
  /** \brief Randomizes the latent factor */
  void randomize() { factor.resize(NLATENT); factor.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const { 
    arc << nupdates << residual << factor;        
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> nupdates >> residual >> factor;
  }
}; // end of vertex data


size_t vertex_data::NLATENT = 20;

/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data also stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief the weight or time of the observation */
  float weight; 
  
  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = TRAIN, float weight = 1) :
    obs(obs), weight(weight), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

#include "implicit.hpp"

stats_info count_edges(const graph_type::edge_type & edge){
  stats_info ret;

  if (edge.data().role == edge_data::TRAIN)
     ret.training_edges = 1;
  else if (edge.data().role == edge_data::VALIDATE)
     ret.validation_edges = 1;
  ret.max_user = (size_t)edge.source().id();
  ret.max_item = (size_t)edge.target().id();
  return ret;
}


/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex


/**
 * \brief The gather type used to construct XtX and Xty needed for the ALS
 * update
 *
 * To compute the ALS update we need to compute the sum of 
 * \code
 *  sum: XtX = nbr.factor.transpose() * nbr.factor 
 *  sum: Xy  = nbr.factor * edge.obs
 * \endcode
 * For each of the neighbors of a vertex. 
 *
 * To do this in the Gather-Apply-Scatter model the gather function
 * computes and returns a pair consisting of XtX and Xy which are then
 * added. The gather type represents that tuple and provides the
 * necessary gather_type::operator+= operation.
 *
 */
class gather_type {
public:
  /**
   * \brief Stores the current sum of nbr.factor.transpose() *
   * nbr.factor
   */
  mat_type XtX;

  /**
   * \brief Stores the current sum of nbr.factor * edge.obs
   */
  vec_type Xy;

  /**
   * \brief Stores the weight of this edge
   */
  float weight;

  /** \brief basic default constructor */
  gather_type() { }

  /**
   * \brief This constructor computes XtX and Xy and stores the result
   * in XtX and Xy
   */
  gather_type(const vec_type& X, const double y, const float weight) :
    XtX(X.size(), X.size()), Xy(X.size()) {
    XtX.triangularView<Eigen::Upper>() = X * X.transpose() * weight;
    Xy = X * y * weight;
  } // end of constructor for gather type

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << XtX << Xy << weight; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> XtX >> Xy >> weight; }  

  /** 
   * \brief Computes XtX += other.XtX and Xy += other.Xy updating this
   * tuples value
   */
  gather_type& operator+=(const gather_type& other) {
    if(other.Xy.size() == 0) {
      ASSERT_EQ(other.XtX.rows(), 0);
      ASSERT_EQ(other.XtX.cols(), 0);
    } else {
      if(Xy.size() == 0) {
        ASSERT_EQ(XtX.rows(), 0); 
        ASSERT_EQ(XtX.cols(), 0);
        XtX = other.XtX; Xy = other.Xy;
      } else {
        XtX.triangularView<Eigen::Upper>() += other.XtX;  
        Xy += other.Xy;
      }
    }
    return *this;
  } // end of operator+=

}; // end of gather type


/**
 * \brief WALS vertex program implements the alternating least squares
 * algorithm in the Gather-Apply-Scatter abstraction.
 *
 * The ALS update treats adjacent vertices (rows or columns) as "X"
 * (independent) values and the edges (matrix entries) as observed "y"
 * (dependent) values and then updates the current vertex value as a
 * weight "w" such that:
 *
 *    y = X * w + noise
 *
 * This is accomplished using the following equation:
 *
 *    w = inv(X' * X) * (X * y)
 *
 * We implement this in the Gather-Apply-Scatter model by:
 *
 *  1) Gather: returns the tuple (X' * X, X * y)
 *     Sum:   (aX' * aX, aX * ay) + (bX' * bX, bX * by) = 
 *                 (aX' * aX + bX' * bX, aX * ay + bX * by)
 *
 *  2) Apply: Solves  inv(X' * X) * (X * y)
 *
 *  3) Scatter: schedules the update of adjacent vertices if this
 *      vertex has changed sufficiently and the edge is not well
 *      predicted.
 *
 * 
 */ 
class als_vertex_program : 
  public graphlab::ivertex_program<graph_type, gather_type,
                                   graphlab::messages::sum_priority>,
  public graphlab::IS_POD_TYPE {
public:
  /** The convergence tolerance */
  static double TOLERANCE;
  static double LAMBDA;
  static size_t MAX_UPDATES;
  static double MAXVAL;
  static double MINVAL;
 
  /** The set of edges to gather along */
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /** The gather function computes XtX and Xy */
  gather_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    if(edge.data().role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      return gather_type(other_vertex.data().factor, edge.data().obs, edge.data().weight);
    } else return gather_type();
  } // end of gather function

  /** apply collects the sum of XtX and Xy */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    // Get and reset the vertex data
    vertex_data& vdata = vertex.data(); 
    // Determine the number of neighbors.  Each vertex has only in or
    // out edges depending on which side of the graph it is located
    if(sum.Xy.size() == 0) { vdata.residual = 0; ++vdata.nupdates; return; }
    mat_type XtX = sum.XtX;
    vec_type Xy = sum.Xy;
    // Add regularization
    for(int i = 0; i < XtX.rows(); ++i) XtX(i,i) += LAMBDA; // /nneighbors;
    // Solve the least squares problem using eigen ----------------------------
    const vec_type old_factor = vdata.factor;
    vdata.factor = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xy);
    // Compute the residual change in the factor factor -----------------------
    vdata.residual = (vdata.factor - old_factor).cwiseAbs().sum() / XtX.rows();
    ++vdata.nupdates;
  } // end of apply
  
  /** The edges to scatter along */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /** Scatter reschedules neighbors */  
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    edge_data& edata = edge.data();
    if(edata.role == edge_data::TRAIN) {
      const vertex_type other_vertex = get_other_vertex(edge, vertex);
      const vertex_data& vdata = vertex.data();
      const vertex_data& other_vdata = other_vertex.data();
      const double pred = vdata.factor.dot(other_vdata.factor);
      const float error = std::fabs(edata.obs - pred);
      const double priority = (error * vdata.residual); 
      // Reschedule neighbors ------------------------------------------------
      if( priority > TOLERANCE && other_vdata.nupdates < MAX_UPDATES) 
        context.signal(other_vertex, priority);
    }
  } // end of scatter function


  /**
   * \brief Signal all vertices on one side of the bipartite graph
   */
  static graphlab::empty signal_left(icontext_type& context,
                                     const vertex_type& vertex) {
    if(vertex.num_out_edges() > 0) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_left 

}; // end of als vertex program


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
                         const std::string& filename,
                         const std::string& line) {
  
 // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0), weight(1);
  strm >> source_id >> target_id;

  if (source_id == graph_type::vertex_id_type(-1) || target_id == graph_type::vertex_id_type(-1)){
    logstream(LOG_WARNING)<<"Failed to read input line: "<< line << " in file: "  << filename << " (or node id is -1). " << std::endl;
    return true;
  }

  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
 
  // for test files (.predict) no need to read the actual rating value.
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE){
    strm >> obs >> weight;
    if (obs < als_vertex_program::MINVAL || obs > als_vertex_program::MAXVAL)
      logstream(LOG_FATAL)<<"Rating values should be between " << als_vertex_program::MINVAL << " and " << als_vertex_program::MAXVAL << ". Got value: " << obs << " [ user: " << source_id << " to item: " <<target_id << " ] " << std::endl; 
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));
                          
  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role, weight)); 
  return true; // successful load
}


// end of graph_loader


/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred = 
    edge.source().data().factor.dot(edge.target().data().factor);
  pred = std::min(als_vertex_program::MAXVAL, pred);
  pred = std::max(als_vertex_program::MINVAL, pred);
  return (edge.data().obs - pred) * (edge.data().obs - pred) * edge.data().weight;
} // end of extract_l2_error


double als_vertex_program::TOLERANCE = 1e-3;
double als_vertex_program::LAMBDA = 0.01;
size_t als_vertex_program::MAX_UPDATES = -1;
double als_vertex_program::MAXVAL = 1e+100;
double als_vertex_program::MINVAL = -1e+100;


/**
 * \brief The error aggregator is used to accumulate the overal
 * prediction error.
 *
 * The error aggregator is itself a "reduction type" and contains the
 * two static methods "map" and "finalize" which operate on
 * error_aggregators and are used by the engine.add_edge_aggregator
 * api.
 */
struct error_aggregator : public graphlab::IS_POD_TYPE {
  typedef als_vertex_program::icontext_type icontext_type;
  typedef graph_type::edge_type edge_type;
  double train_error, validation_error;
  error_aggregator() : 
    train_error(0), validation_error(0){ }
  error_aggregator& operator+=(const error_aggregator& other) {
    train_error += other.train_error;
    validation_error += other.validation_error;
    return *this;
  }
  static error_aggregator map(icontext_type& context, const graph_type::edge_type& edge) {
    error_aggregator agg;
    if(edge.data().role == edge_data::TRAIN) {
      agg.train_error = extract_l2_error(edge); 
    } else if(edge.data().role == edge_data::VALIDATE) {
      agg.validation_error = extract_l2_error(edge); 
    }
    return agg;
  }
  static void finalize(icontext_type& context, const error_aggregator& agg) {
    const double train_error = std::sqrt(agg.train_error / info.training_edges);
    context.cout() << context.elapsed_seconds() << "\t" << train_error;
    if(info.validation_edges > 0) {
      const double validation_error = 
        std::sqrt(agg.validation_error / info.validation_edges);
      context.cout() << "\t" << validation_error; 
    }
    context.cout() << std::endl;
  }
}; // end of error aggregator


/**
 * \brief The prediction saver is used by the graph.save routine to
 * output the final predictions back to the filesystem.
 */
struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if(edge.data().role == edge_data::PREDICT) {
      std::stringstream strm;
      const double prediction = 
        edge.source().data().factor.dot(edge.target().data().factor);
      strm << edge.source().id() << '\t';
      if(REMAP_TARGET) strm << (-edge.target().id() - SAFE_NEG_OFFSET) << '\t';
      else strm << edge.target().id() << '\t';
      strm << prediction << '\n';
      return strm.str();
    } else return "";
  }
}; // end of prediction_saver


struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + ") ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().factor[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


/**
 * \brief The engine type used by the ALS matrix factorization
 * algorithm.
 *
 * The ALS matrix factorization algorithm currently uses the
 * synchronous engine.  However we plan to add support for alternative
 * engines in the future.
 */
typedef graphlab::omni_engine<als_vertex_program> engine_type;

int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Compute the Weighted-ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir, output_dir;
  std::string predictions;
  size_t interval = 10;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D",  vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("max_iter", als_vertex_program::MAX_UPDATES,
                       "The maxumum number of udpates allowed for a vertex");
  clopts.attach_option("lambda", als_vertex_program::LAMBDA, 
                       "wALS regularization weight"); 
  clopts.attach_option("tol", als_vertex_program::TOLERANCE,
                       "residual termination threshold");
  clopts.attach_option("maxval", als_vertex_program::MAXVAL, "max allowed value");
  clopts.attach_option("minval", als_vertex_program::MINVAL, "min allowed value");
  clopts.attach_option("interval", interval, 
                       "The time in seconds between error reports");
  clopts.attach_option("predictions", predictions,
                       "The prefix (folder and filename) to save predictions.");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  // clopts.attach_option("remap", REMAP_TARGET,
  //                      "Renumber target vertex ids (internally) so that they\n" 
  //                      "are in a different range allowing user 0 to connect to movie 0");
  clopts.attach_option("output", output_dir,
                       "Output results");

  parse_implicit_command_line(clopts);

  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }

  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  dc.cout() << "Loading graph. Finished in " 
            << timer.current_time() << std::endl;

  if (dc.procid() == 0) 
    add_implicit_edges4<edge_data>(implicitratingtype, graph, dc);
  
  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
            << timer.current_time() << std::endl;
  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout() 
      << "========== Graph statistics on proc " << dc.procid() 
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
      << "\n Edge balance ratio: " 
      << float(graph.num_local_edges())/graph.num_edges()
      << std::endl;
 
  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);

  // Add error reporting to the engine
  const bool success = engine.add_edge_aggregator<error_aggregator>
    ("error", error_aggregator::map, error_aggregator::finalize) &&
    engine.aggregate_periodic("error", interval);
  ASSERT_TRUE(success);
  

  // Signal all vertices on the vertices on the left (liberals) 
  engine.map_reduce_vertices<graphlab::empty>(als_vertex_program::signal_left);
  info = graph.map_reduce_edges<stats_info>(count_edges);
  dc.cout()<<"Training edges: " << info.training_edges << " validation edges: " << info.validation_edges << std::endl;

 
  // Run the WALS ---------------------------------------------------------
  dc.cout() << "Running Weighted-ALS" << std::endl;
  timer.start();
  engine.start();  

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime 
            << std::endl
            << "Updates executed: " << engine.num_updates() << std::endl
            << "Update Rate (updates/second): " 
            << engine.num_updates() / runtime << std::endl;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  engine.aggregate_now("error");

  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 2;

    //save the predictions
    graph.save(predictions, prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
  
  }
             

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/warp_als_coord.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 *
 * Matrix factorization with the Alternative Least Squares (ALS)  - parallel coordinate descent algorithm.
 * See the papers:
 * H.-F. Yu, C.-J. Hsieh, S. Si, I. S. Dhillon, Scalable Coordinate Descent Approaches to Parallel Matrix Factorization for Recommender Systems. IEEE International Conference on Data Mining(ICDM), December 2012.
 * Steffen Rendle, Zeno Gantner, Christoph Freudenthaler, and Lars Schmidt-Thieme. 2011. Fast context-aware recommendations with factorization machines. In Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval (SIGIR '11). ACM, New York, NY, USA, 635-644.
 * Written by Danny Bickson, CMU
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
#include <Eigen/Dense>
#include "eigen_serialization.hpp"
#include <graphlab/macros_def.hpp>

typedef Eigen::VectorXd vec_type;

#define ALS_COORD_MAP_REDUCE 0
#define ALS_COORD_TRANSFORM 1
//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
const static int regnormal = 0;
static bool debug;
int max_iter = 10;
double maxval = 1e100;
double minval = -1e100;
std::string predictions;
bool isuser(uint node){
  return ((int)node) >= 0;
}

/**
 * \ingroup toolkit_matrix_factorization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the SGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the SGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The latent pvec for this vertex */
  vec_type pvec;
  vec_type prev;
  float z;
  int t; //index inside the latent feature vector

  /**
   * \brief Simple default constructor which randomizes the vertex
   *  data
   */
  vertex_data() : t(0),z(0) { if (debug) pvec = vec_type::Ones(NLATENT); else randomize(); prev = vec_type::Zero(NLATENT); }
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(NLATENT); pvec.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const {
    arc << pvec << t << prev << z;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) {
    arc >> pvec >> t >> prev >> z;
  }
}; // end of vertex data

std::size_t hash_value(vertex_data const& b) {
  return (size_t)b.pvec[0]*1000;
}


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data sgdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;
  
  /** \brief cached value for A_ij - prediction */
  float R_ij;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role), R_ij(0) { }

}; // end of edge data

std::size_t hash_value(edge_data const& b) {
  return boost::hash_value(b.obs);
}


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
typedef graphlab::gl3engine<graph_type> engine_type;

bool isuser_node(const graph_type::vertex_type& vertex){
  return isuser(vertex.id());
}


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph,
                         const std::string& filename,
                         const std::string& line) {
  ASSERT_FALSE(line.empty());
  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
  // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0);
  strm >> source_id >> target_id;

  // for test files (.predict) no need to read the actual rating value.
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE) {
    strm >> obs;
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));

  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role));
  return true; // successful load
} // end of graph_loader

double LAMBDA = 0.001;

class gather_type {
public:
  double numerator;
  double denominator;

  gather_type() { 
    numerator = 0;
    denominator = 0;
  }

  gather_type(double numerator, double denominator) : numerator(numerator),
     denominator(denominator){
  }

  /** \brief Save the values to a binary archive */
  void save(graphlab::oarchive& arc) const { arc << numerator << denominator; }

  /** \brief Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> numerator >> denominator; }  

  /** 
   * sums up values
   */
  gather_type& operator+=(const gather_type& other) {
    numerator += other.numerator;
    denominator += other.denominator;
    return *this;
  } // end of operator+=

}; // end of gather type


gather_type als_coord_map(const graph_type::vertex_type& center,
                         graph_type::edge_type& edge,
                         const graph_type::vertex_type& other) {

   if (center.data().t == 0){
     double prediction = center.data().pvec.dot(other.data().pvec);
     prediction = std::min(prediction, maxval);
     prediction = std::max(prediction, minval);
     edge.data().R_ij = edge.data().obs - prediction;
   }
   //compute numerator of equation (6) in ICDM paper above
   //             (A_ij        - w_i^T*h_j  + wit          * h_jt        )*h_jt 
   gather_type ret((edge.data().R_ij
                               + center.data().pvec[center.data().t] * other.data().pvec[center.data().t])*other.data().pvec[center.data().t],
   //compute denominator of equation (6) in ICDM paper above
   //h_jt^2
     pow(other.data().pvec[center.data().t], 2));
   return ret;

}

void als_coord_transform(const graph_type::vertex_type& center,
                         graph_type::edge_type& edge,
                         const graph_type::vertex_type& other) {
   //update using equation (7) in ICDM paper
   //R_ij     -= (z             - w_it         )*h_jt
   edge.data().R_ij -= (center.data().z - center.data().prev[center.data().t])*other.data().pvec[center.data().t];
}


//sum up two numerators and denomenators
void als_coord_combine(gather_type& v1, const gather_type& v2) {
    v1 += v2;
}

//the main update function
void als_coord_function(engine_type::context_type& context,
                  graph_type::vertex_type& vertex) {
       
   double regularization = LAMBDA;
   for (vertex.data().t=0; vertex.data().t< (int)vertex_data::NLATENT; vertex.data().t++){
     gather_type frac =  context.map_reduce<gather_type>(ALS_COORD_MAP_REDUCE, graphlab::ALL_EDGES);
     assert(frac.denominator > 0);
     vertex.data().z = (frac.numerator/(frac.denominator+regularization));  
     vertex.data().prev = vertex.data().pvec;
     //update using equation (8) in ICDM paper
     //w_it                              = z;
     vertex.data().pvec[vertex.data().t] = vertex.data().z;
  
     //update the cached R_ij using equation (7) in ICDM paper 
     context.edge_transform(ALS_COORD_TRANSFORM, graphlab::ALL_EDGES);
   }

}


/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred =
      edge.source().data().pvec.dot(edge.target().data().pvec);
  double rmse = (edge.data().obs - pred) * (edge.data().obs - pred);
  return rmse;
} // end of extract_l2_error


size_t vertex_data::NLATENT = 20;
/**
 * \brief The prediction saver is used by the graph.save routine to
 * output the final predictions back to the filesystem.
 */
struct prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    return ""; //nop
  }
  std::string save_edge(const edge_type& edge) const {
    if(edge.data().role == edge_data::PREDICT) {
      std::stringstream strm;
      double prediction = 
        edge.source().data().pvec.dot(edge.target().data().pvec);
      prediction = std::min(prediction, maxval);
      prediction = std::max(prediction, minval);
      strm << edge.source().id() << '\t';
      strm << (-edge.target().id() - SAFE_NEG_OFFSET) << '\t';
      strm << prediction << '\n';
      return strm.str();
    } else return "";
  }
}; // end of prediction_saver


struct linear_model_saver_U {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() > 0){
      std::string ret = boost::lexical_cast<std::string>(vertex.id()) + " ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 

struct linear_model_saver_V {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  /* save the linear model, using the format:
     nodeid) factor1 factor2 ... factorNLATENT \n
  */
  std::string save_vertex(const vertex_type& vertex) const {
    if (vertex.num_out_edges() == 0){
      std::string ret = boost::lexical_cast<std::string>(-vertex.id()-SAFE_NEG_OFFSET) + ") ";
      for (uint i=0; i< vertex_data::NLATENT; i++)
        ret += boost::lexical_cast<std::string>(vertex.data().pvec[i]) + " ";
        ret += "\n";
      return ret;
    }
    else return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description =
      "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D", vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("maxval", maxval, "max allowed value");
  clopts.attach_option("minval", minval, "min allowed value");
  clopts.attach_option("predictions", predictions,
                       "The prefix (folder and filename) to save predictions.");
  clopts.attach_option("lambda", LAMBDA,
                       "regularization weight");
  clopts.attach_option("max_iter", max_iter,
                       "number of iterations");
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer;
  graph_type graph(dc, clopts);
  graph.load(input_dir, graph_loader);
  dc.cout() << "Loading graph. Finished in "
            << timer.current_time() << std::endl;

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in "
            << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout()
      << "========== Graph statistics on proc " << dc.procid()
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: "
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------"
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: "
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
        << "\n Edge balance ratio: "
        << float(graph.num_local_edges())/graph.num_edges()
        << std::endl;

  dc.cout() << "Creating engine" << std::endl;

  engine_type engine(dc, graph, clopts);
  engine.register_map_reduce(ALS_COORD_MAP_REDUCE, als_coord_map, als_coord_combine);
  engine.register_edge_transform(ALS_COORD_TRANSFORM, als_coord_transform);
  for (int i=0; i< max_iter; i++){
     engine.parfor_all_local_vertices(als_coord_function);
     engine.wait();
     double rmse = graph.map_reduce_edges<double>(extract_l2_error);
     dc.cout() << "RMSE = " << sqrt(rmse / graph.num_edges()) << std::endl;
  }

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;
  // Make predictions ---------------------------------------------------------
  if(!predictions.empty()) {
    std::cout << "Saving predictions" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 2;

    //save the predictions
    graph.save(predictions, prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
    //save the linear model
    graph.save(predictions + ".U", linear_model_saver_U(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
    graph.save(predictions + ".V", linear_model_saver_V(),
		gzip_output, save_edges, save_vertices, threads_per_machine);
  
  }
 

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/collaborative_filtering/warp_nmf.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file
 *
 * \brief The main file for the NMF matrix factorization algorithm.
 *
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
#include <Eigen/Dense>
#include "eigen_serialization.hpp"
#include <graphlab/macros_def.hpp>

#define VERTEX_DELTA_TASK_ID 0

typedef Eigen::VectorXd vec_type;
typedef Eigen::MatrixXd mat_type;

//when using negative node id range, we are not allowed to use
//0 and 1 so we add 2.
const static int SAFE_NEG_OFFSET=2;
static bool debug;
int iter = 0;
enum { PHASE1, PHASE2};
int phase = PHASE1;
double epsilon = 1e-16;

bool isuser(uint node){
  return ((int)node) >= 0;
}

/**
 * \ingroup toolkit_matrix_pvecization
 *
 * \brief the vertex data type which contains the latent pvec.
 *
 * Each row and each column in the matrix corresponds to a different
 * vertex in the SGD graph.  Associated with each vertex is a pvec
 * (vector) of latent parameters that represent that vertex.  The goal
 * of the SGD algorithm is to find the values for these latent
 * parameters such that the non-zero entries in the matrix can be
 * predicted by taking the dot product of the row and column pvecs.
 */
struct vertex_data {
  /**
   * \brief A shared "constant" that specifies the number of latent
   * values to use.
   */
  static size_t NLATENT;
  /** \brief The latent pvec for this vertex */
  vec_type pvec;

  int nupdates;

  /**
   * \brief Simple default constructor which randomizes the vertex
   *  data
   */
  vertex_data() { if (debug) pvec = vec_type::Ones(NLATENT); else randomize(); }
  /** \brief Randomizes the latent pvec */
  void randomize() { pvec.resize(NLATENT); pvec.setRandom(); }
  /** \brief Save the vertex data to a binary archive */
  void save(graphlab::oarchive& arc) const {
    arc << pvec;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) {
    arc >> pvec;
  }
}; // end of vertex data

std::size_t hash_value(vertex_data const& b) {
  return b.nupdates;
}


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data sgdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
  /**
   * \brief The type of data on the edge;
   *
   * \li *Train:* the observed value is correct and used in training
   * \li *Validate:* the observed value is correct but not used in training
   * \li *Predict:* The observed value is not correct and should not be
   *        used in training.
   */
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  /** \brief the observed value for the edge */
  float obs;

  /** \brief The train/validation/test designation of the edge */
  data_role_type role;

  /** \brief basic initialization */
  edge_data(float obs = 0, data_role_type role = PREDICT) :
    obs(obs), role(role) { }

}; // end of edge data

std::size_t hash_value(edge_data const& b) {
  return boost::hash_value(b.obs);
}


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
typedef graphlab::gl3engine<graph_type> engine_type;


vec_type x1;
vec_type x2;
vec_type * px;

bool isuser_node(const graph_type::vertex_type& vertex){
  return isuser(vertex.id());
}


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph,
                         const std::string& filename,
                         const std::string& line) {
  ASSERT_FALSE(line.empty());
  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  if(boost::ends_with(filename,".validate")) role = edge_data::VALIDATE;
  else if(boost::ends_with(filename, ".predict")) role = edge_data::PREDICT;
  // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0);
  strm >> source_id >> target_id;

  // for test files (.predict) no need to read the actual rating value.
  if(role == edge_data::TRAIN || role == edge_data::VALIDATE) {
    strm >> obs;
  }
  target_id = -(graphlab::vertex_id_type(target_id + SAFE_NEG_OFFSET));

  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(obs, role));
  return true; // successful load
} // end of graph_loader


void vertex_delta(graph_type::vertex_type& vtx, const vec_type& delta) {
  if (delta.sum() != 0)
    vtx.data().pvec.array() *= delta.array() / px->array(); 
  for (uint i=0; i< vertex_data::NLATENT; i++)
    if (vtx.data().pvec[i] < epsilon)
      vtx.data().pvec[i] = epsilon;
}

void nmf_function(engine_type::context_type& context,
                  graph_type::edge_type& edge) {
  double pred = edge.source().data().pvec.dot(edge.target().data().pvec);
  if (pred == 0)
     logstream(LOG_FATAL)<<"Got into numerical error!" << std::endl;    
  vec_type delta;
  delta = (phase == PHASE1 ? edge.target().data().pvec : edge.source().data().pvec) * edge.data().obs / pred;
  context.send_delta(VERTEX_DELTA_TASK_ID, phase == PHASE1 ? edge.source() : edge.target(), delta);
}


vec_type count_edges(const graph_type::edge_type& edge) {
  vec_type ret = vec_type::Zero(2);
  if (edge.data().role == edge_data::TRAIN){
    ret[0] = 1;
  }
  else if (edge.data().role == edge_data::VALIDATE){
    ret[1] = 1;
  }
  if (edge.data().obs < 0)
    logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << edge.source().id() << " with value: " << edge.data().obs << std::endl;
  return ret;
}

void verify_rows(
    graph_type::vertex_type& vertex){
        if (isuser(vertex.id()) && vertex.num_out_edges() == 0)
          logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl;
}

vec_type pre_iter( const graph_type::vertex_type & vertex){
  return vertex.data().pvec;
}


void sync_function(engine_type::context_type& context,
                   graph_type::vertex_type& vertex) {
  context.synchronize(vertex);
}

/**
 * \brief Given an edge compute the error associated with that edge
 */
double extract_l2_error(const graph_type::edge_type & edge) {
  double pred =
      edge.source().data().pvec.dot(edge.target().data().pvec);
  double rmse = (edge.data().obs - pred) * (edge.data().obs - pred);
  return rmse;
} // end of extract_l2_error


size_t vertex_data::NLATENT = 20;


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description =
      "Compute the ALS factorization of a matrix.";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  size_t interval = 0;
  size_t ITERATIONS = 10;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("D", vertex_data::NLATENT,
                       "Number of latent parameters to use.");
  clopts.attach_option("interval", interval,
                       "The time in seconds between error reports");
  clopts.attach_option("iterations", ITERATIONS,
                       "number of NMF iterations");
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer;
  graph_type graph(dc, clopts);
  graph.load(input_dir, graph_loader);
  dc.cout() << "Loading graph. Finished in "
            << timer.current_time() << std::endl;

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in "
            << timer.current_time() << std::endl;

  if (!graph.num_edges() || !graph.num_vertices())
     logstream(LOG_FATAL)<< "Failed to load graph. Check your input path: " << input_dir << std::endl;     


  dc.cout()
      << "========== Graph statistics on proc " << dc.procid()
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: "
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------"
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: "
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
        << "\n Edge balance ratio: "
        << float(graph.num_local_edges())/graph.num_edges()
        << std::endl;

  dc.cout() << "Creating engine" << std::endl;


  engine_type engine(dc, graph, clopts);
  vec_type edge_count = graph.map_reduce_edges<vec_type>(count_edges);
  dc.cout()<<"Training edges: " << edge_count[0] << " validation edges: " << edge_count[1] << std::endl;

  graphlab::vertex_set left = graph.select(isuser_node);
  graphlab::vertex_set right = ~left;
  graph.transform_vertices(verify_rows, left);

  engine.register_vertex_delta<vec_type>(VERTEX_DELTA_TASK_ID, vertex_delta);

  dc.cout() << "Running NMF" << std::endl;

  timer.start();
  for (size_t i = 0;i < ITERATIONS; ++i) {
    phase = PHASE1;
    x1 = graph.map_reduce_vertices<vec_type>(pre_iter,right);
    px = &x1;
    dc.cout() <<"x1 is: " << x1 << std::endl;

    engine.parfor_all_local_edges(nmf_function); //todo - only left
    engine.parfor_all_local_vertices(sync_function); //todo - only left
    engine.wait();

    phase = PHASE2;

    x2 = graph.map_reduce_vertices<vec_type>(pre_iter,left);
    px = &x2;
    dc.cout() <<"x2 is: " << x2 << std::endl;

    engine.parfor_all_local_edges(nmf_function); //todo only right
    engine.parfor_all_local_vertices(sync_function); //todo only right
    engine.wait();
   
    double rmse = graph.map_reduce_edges<double>(extract_l2_error);
    dc.cout() << "RMSE = " << sqrt(rmse / graph.num_edges()) << std::endl;
 
  }


  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime;

  // Compute the final training error -----------------------------------------
  dc.cout() << "Final error: " << std::endl;


  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/computer_vision/CMakeLists.txt
================================================
project(ComputerVision)

add_graphlab_executable(grabcut grabcut.cpp)
requires_opencv(grabcut)

add_graphlab_executable(stitching stitching.cpp)
requires_opencv(stitching)

add_graphlab_executable(stitching_detailed stitching_detailed.cpp)
requires_opencv(stitching_detailed)

add_graphlab_executable(stitch stitch_main.cpp)
requires_opencv(stitch)
requires_eigen(stitch)

add_graphlab_executable(stitch_full stitch_full_main.cpp)
requires_opencv(stitch_full)
requires_eigen(stitch_full)
target_link_libraries( stitch_full ${OpenCV_LIBS} )
set (CMAKE_C_FLAGS "-g -Wall")
set (CMAKE_CXX_FLAGS "-g -Wall")


================================================
FILE: toolkits/computer_vision/computer_vision.dox
================================================
/**

\page computer_vision Computer Vision


\brief GraphLab Computer Vision Toolkit aims to provide fully distributed wrappers to algorithms in <a href="http://opencv.org/">OpenCV</a>, an open-source library aimed at real-time computer vision. 
Eventually, GraphLab Computer Vision Toolkit will become it’s own spin-off project called CloudCV, a system that will provide access to state-of-the-art computer vision algorithms on the cloud.

Currently, the only implemented algorithm is Image-Stitching, where the goal is to create a composite panoramic image from a collection of images.

\section image_stitching Panoramic Image Stitching

\image html panorama_small.png

The goal in image stiching is to create a composite panoramic image from a collection of images. The standard pipeline consists of four main steps:

\li Feature Extraction: where distinctive points (or keypoints) are identified in each image and a feature descriptor (SIFT, SURF, etc) is computed for each keypoint.
\li Image/Feature Matching: where features are matched between pairs of images to estimate relative camera transformations.
\li Global Refinement: of camera transformation parameters across all images.
\li Seam Blending: where seams are estimated between pairs of images and blending is performed.

See the following for details about the pipeline:
\verbatim
M. Brown and D. Lowe. 
Automatic Panoramic Image Stitching using Invariant Features. 
International Journal of Computer Vision, 74(1), pages 59-73, 2007.
\endverbatim

The stiching code in this toolkit is based on <a href="http://docs.opencv.org/modules/stitching/doc/introduction.html">OpenCV Stitching Module</a>.

Implemented by <a href="http://filebox.ece.vt.edu/~dbatra/">Dhruv Batra</a> and Prakriti Banik.

\section running_stitch Running Stitch

The program requires a directory that contains all images from the panorama. Currently (and temporarily for now), 
the program also requires an adjacency list indicating the overlap between images. We are working on incorporating code that will estimate this adjacency list directly from the images. 

\verbatim
> ./stitch --img /path/to/image/dir --graph /path/to/adjacency/list.txt 
\endverbatim

The <a href="http://docs.graphlab.org/graph_formats.html">adjacency list file format</a> stores on each line, 
a vertex (image id), followed by a list of all vertices (image ids) that contain overlapping visual content. Each line has the following format:

[image ID]  [number of neighbouring vertices/images] [neighbour-image ID 1] [neighbour-image ID 2] [neighbour-image ID 3] ...

Here's an example adjacency list file with 3 images (numbered 0,1,2) in a chain graph (0-1-2):
\verbatim
0 1 1
1 2 0 2
2 1 1
\endverbatim


\section computer_vision_options Options

Other arguments are:

\li <b>--help</b> Display the help message describing the list of
options.

\li <b>--output</b> (Optional, default "./") The output directory in which to save
the final mosiac.

\li <b>--verbose</b> (Optional, default 0) How much information to print out.

\li <b>--work_megapix</b> (Optional, default 0.6 Mpx) Resolution for image matching step. See other details in stitch_opts.hpp.

\li <b>--engine</b> (Optional, Default: asynchronous) The engine type to
use when executing the vertex-programs
       - <b>synchronous</b>: All LoopyBP updates are run at the same
         time (Synchronous BP). This engine exposes greater parallelism but is less
         computationally efficient.
       - <b>asynchronous</b>: LoopyBP updates are run asynchronous
         with priorities (Residual BP).  This engine is has greater
         overhead and exposes less parallelism but can substantially
         improve the rate over convergence.

\li <b>--ncpus</b> (Optional, Default 2) The number of local computation 
threads to use on each machine.  This should typically match the number 
of physical cores. 

\li <b>--scheduler</b> (Optional, Default sweep) The scheduler to use when 
running with the asynchronous engine.  The default is typically sufficient. 

\li <b>--engine_opts</b> (Optional, Default empty) Any additional engine
options. See <b>--engine_help</b> for a list of options.


\li <b>--graph_opts</b> (Optional, Default empty) Any additional graph
options. See <b>--graph_help</b> for a list of options.

\li <b>--scheduler_opts</b> (Optional, Default empty) Any additional scheduler
options. See <b>--scheduler_help</b> for a list of options.

*/


================================================
FILE: toolkits/computer_vision/eigen_serialization.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include "eigen_serialization.hpp"


graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::VectorXd& vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  const index_type size = vec.size();
  arc << size;
  graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
  return arc;
} // end of save vector

graphlab::iarchive& operator>>(graphlab::iarchive& arc, Eigen::VectorXd& vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  index_type size = 0;
  arc >> size;
  vec.resize(size);
  graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
  return arc;
} // end of save vector


graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::MatrixXd& mat) {
  typedef Eigen::MatrixXd::Index index_type;
  typedef Eigen::MatrixXd::Scalar scalar_type;
  const index_type rows = mat.rows();
  const index_type cols = mat.cols();
  arc << rows << cols;
  graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
  return arc;
} // end of save matrix

graphlab::iarchive& operator>>(graphlab::iarchive& arc,  Eigen::MatrixXd& mat) {
  typedef Eigen::MatrixXd::Index index_type; 
  typedef Eigen::MatrixXd::Scalar scalar_type;
  index_type rows=0, cols=0;
  arc >> rows >> cols;
  mat.resize(rows,cols);
  graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
  return arc;
} // end of load matrix


================================================
FILE: toolkits/computer_vision/eigen_serialization.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#ifndef EIGEN_SERIALIZATION_HPP
#define EIGEN_SERIALIZATION_HPP


#include <Eigen/Dense>

#include <graphlab.hpp>


BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  const index_type size = vec.size();
  arc << size;
  graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  index_type size = 0;
  arc >> size;
  vec.resize(size);
  graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type;
  typedef Eigen::MatrixXd::Scalar scalar_type;
  const index_type rows = mat.rows();
  const index_type cols = mat.cols();
  arc << rows << cols;
  graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type; 
  typedef Eigen::MatrixXd::Scalar scalar_type;
  index_type rows=0, cols=0;
  arc >> rows >> cols;
  mat.resize(rows,cols);
  graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


// inline graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::VectorXd& vec) {
//   typedef Eigen::VectorXd::Index index_type;
//   typedef Eigen::VectorXd::Scalar scalar_type;
//   const index_type size = vec.size();
//   arc << size;
//   graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
//   return arc;
// } // end of save vector

// inline graphlab::iarchive& operator>>(graphlab::iarchive& arc, Eigen::VectorXd& vec) {
//   typedef Eigen::VectorXd::Index index_type;
//   typedef Eigen::VectorXd::Scalar scalar_type;
//   index_type size = 0;
//   arc >> size;
//   vec.resize(size);
//   graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
//   return arc;
// } // end of save vector


// inline graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::MatrixXd& mat) {
//   typedef Eigen::MatrixXd::Index index_type;
//   typedef Eigen::MatrixXd::Scalar scalar_type;
//   const index_type rows = mat.rows();
//   const index_type cols = mat.cols();
//   arc << rows << cols;
//   graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
//   return arc;
// } // end of save matrix

// inline graphlab::iarchive& operator>>(graphlab::iarchive& arc,  Eigen::MatrixXd& mat) {
//   typedef Eigen::MatrixXd::Index index_type; 
//   typedef Eigen::MatrixXd::Scalar scalar_type;
//   index_type rows=0, cols=0;
//   arc >> rows >> cols;
//   mat.resize(rows,cols);
//   graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
//   return arc;
// } // end of load matrix


#endif


================================================
FILE: toolkits/computer_vision/gcgraph.hpp
================================================
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef _CV_GCGRAPH_H_
#define _CV_GCGRAPH_H_

template <class TWeight> class GCGraph
{
public:
    GCGraph();
    GCGraph( unsigned int vtxCount, unsigned int edgeCount );
    ~GCGraph();
    void create( unsigned int vtxCount, unsigned int edgeCount );
    int addVtx();
    void addEdges( int i, int j, TWeight w, TWeight revw );
    void addTermWeights( int i, TWeight sourceW, TWeight sinkW );
    TWeight maxFlow();
    bool inSourceSegment( int i );
private:
    class Vtx
    {
    public:
        Vtx *next; // initialized and used in maxFlow() only
        int parent;
        int first;
        int ts;
        int dist;
        TWeight weight;
        uchar t; 
    };
    class Edge
    {
    public:
        int dst;
        int next;
        TWeight weight;
    };

    std::vector<Vtx> vtcs;
    std::vector<Edge> edges;
    TWeight flow;
};

template <class TWeight>
GCGraph<TWeight>::GCGraph()
{
    flow = 0;
}
template <class TWeight>
GCGraph<TWeight>::GCGraph( unsigned int vtxCount, unsigned int edgeCount )
{
    create( vtxCount, edgeCount );
}
template <class TWeight>
GCGraph<TWeight>::~GCGraph()
{
}
template <class TWeight>
void GCGraph<TWeight>::create( unsigned int vtxCount, unsigned int edgeCount )
{
    vtcs.reserve( vtxCount );
    edges.reserve( edgeCount + 2 );
    flow = 0;
}

template <class TWeight>
int GCGraph<TWeight>::addVtx()
{
    Vtx v;
    memset( &v, 0, sizeof(Vtx));
    vtcs.push_back(v);
    return (int)vtcs.size() - 1;
}

template <class TWeight>
void GCGraph<TWeight>::addEdges( int i, int j, TWeight w, TWeight revw )
{
    CV_Assert( i>=0 && i<(int)vtcs.size() );
    CV_Assert( j>=0 && j<(int)vtcs.size() );
    CV_Assert( w>=0 && revw>=0 );
    CV_Assert( i != j );

    if( !edges.size() )
        edges.resize( 2 );

    Edge fromI, toI;
    fromI.dst = j;
    fromI.next = vtcs[i].first;
    fromI.weight = w;
    vtcs[i].first = (int)edges.size();
    edges.push_back( fromI );

    toI.dst = i;
    toI.next = vtcs[j].first;
    toI.weight = revw;
    vtcs[j].first = (int)edges.size();
    edges.push_back( toI );
}

template <class TWeight>
void GCGraph<TWeight>::addTermWeights( int i, TWeight sourceW, TWeight sinkW )
{
    CV_Assert( i>=0 && i<(int)vtcs.size() );

    TWeight dw = vtcs[i].weight;
    if( dw > 0 )
        sourceW += dw;
    else
        sinkW -= dw;
    flow += (sourceW < sinkW) ? sourceW : sinkW;
    vtcs[i].weight = sourceW - sinkW;
}

template <class TWeight>
TWeight GCGraph<TWeight>::maxFlow()
{
    const int TERMINAL = -1, ORPHAN = -2;
    Vtx stub, *nilNode = &stub, *first = nilNode, *last = nilNode;
    int curr_ts = 0;
    stub.next = nilNode;
    Vtx *vtxPtr = &vtcs[0];
    Edge *edgePtr = &edges[0];

    std::vector<Vtx*> orphans;

    // initialize the active queue and the graph vertices
    for( int i = 0; i < (int)vtcs.size(); i++ )
    {
        Vtx* v = vtxPtr + i;
        v->ts = 0;
        if( v->weight != 0 )
        {
            last = last->next = v;
            v->dist = 1;
            v->parent = TERMINAL;
            v->t = v->weight < 0;
        }
        else
            v->parent = 0;        
    }
    first = first->next;
    last->next = nilNode;
    nilNode->next = 0;

    // run the search-path -> augment-graph -> restore-trees loop
    for(;;)
    {
        Vtx* v, *u;
        int e0 = -1, ei = 0, ej = 0;
        TWeight minWeight, weight;
        uchar vt;

        // grow S & T search trees, find an edge connecting them
        while( first != nilNode )
        {
            v = first;
            if( v->parent )
            {
                vt = v->t;
                for( ei = v->first; ei != 0; ei = edgePtr[ei].next )
                {
                    if( edgePtr[ei^vt].weight == 0 )
                        continue;
                    u = vtxPtr+edgePtr[ei].dst;
                    if( !u->parent )
                    {
                        u->t = vt;
                        u->parent = ei ^ 1;
                        u->ts = v->ts;
                        u->dist = v->dist + 1;
                        if( !u->next )
                        {
                            u->next = nilNode;
                            last = last->next = u;
                        }
                        continue;
                    }

                    if( u->t != vt )
                    {
                        e0 = ei ^ vt;
                        break;
                    }

                    if( u->dist > v->dist+1 && u->ts <= v->ts )
                    {
                        // reassign the parent
                        u->parent = ei ^ 1;
                        u->ts = v->ts;
                        u->dist = v->dist + 1;
                    }
                }
                if( e0 > 0 )
                    break;
            }
            // exclude the vertex from the active list
            first = first->next;
            v->next = 0;
        }

        if( e0 <= 0 )
            break;

        // find the minimum edge weight along the path
        minWeight = edgePtr[e0].weight;
        assert( minWeight > 0 );
        // k = 1: source tree, k = 0: destination tree
        for( int k = 1; k >= 0; k-- )
        {
            for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst )
            {
                if( (ei = v->parent) < 0 )
                    break;
                weight = edgePtr[ei^k].weight;
                minWeight = MIN(minWeight, weight);
                assert( minWeight > 0 );
            }
            weight = fabs(v->weight);
            minWeight = MIN(minWeight, weight);
            assert( minWeight > 0 );
        }

        // modify weights of the edges along the path and collect orphans
        edgePtr[e0].weight -= minWeight;
        edgePtr[e0^1].weight += minWeight;
        flow += minWeight;

        // k = 1: source tree, k = 0: destination tree
        for( int k = 1; k >= 0; k-- )
        {
            for( v = vtxPtr+edgePtr[e0^k].dst;; v = vtxPtr+edgePtr[ei].dst )
            {
                if( (ei = v->parent) < 0 )
                    break;
                edgePtr[ei^(k^1)].weight += minWeight;
                if( (edgePtr[ei^k].weight -= minWeight) == 0 )
                {
                    orphans.push_back(v);
                    v->parent = ORPHAN;
                }
            }

            v->weight = v->weight + minWeight*(1-k*2);
            if( v->weight == 0 )
            {
               orphans.push_back(v);
               v->parent = ORPHAN;
            }
        }

        // restore the search trees by finding new parents for the orphans
        curr_ts++;
        while( !orphans.empty() )
        {
            Vtx* v = orphans.back();
            orphans.pop_back();

            int d, minDist = INT_MAX;
            e0 = 0;
            vt = v->t;

            for( ei = v->first; ei != 0; ei = edgePtr[ei].next )
            {
                if( edgePtr[ei^(vt^1)].weight == 0 )
                    continue;
                u = vtxPtr+edgePtr[ei].dst;
                if( u->t != vt || u->parent == 0 )
                    continue;
                // compute the distance to the tree root
                for( d = 0;; )
                {
                    if( u->ts == curr_ts )
                    {
                        d += u->dist;
                        break;
                    }
                    ej = u->parent;
                    d++;
                    if( ej < 0 )
                    {
                        if( ej == ORPHAN )
                            d = INT_MAX-1;
                        else
                        {
                            u->ts = curr_ts;
                            u->dist = 1;
                        }
                        break;
                    }
                    u = vtxPtr+edgePtr[ej].dst;
                }

                // update the distance
                if( ++d < INT_MAX )
                {
                    if( d < minDist )
                    {
                        minDist = d;
                        e0 = ei;
                    }
                    for( u = vtxPtr+edgePtr[ei].dst; u->ts != curr_ts; u = vtxPtr+edgePtr[u->parent].dst )
                    {
                        u->ts = curr_ts;
                        u->dist = --d;
                    }
                }
            }

            if( (v->parent = e0) > 0 )
            {
                v->ts = curr_ts;
                v->dist = minDist;
                continue;
            }

            /* no parent is found */
            v->ts = 0;
            for( ei = v->first; ei != 0; ei = edgePtr[ei].next )
            {
                u = vtxPtr+edgePtr[ei].dst;
                ej = u->parent;
                if( u->t != vt || !ej )
                    continue;
                if( edgePtr[ei^(vt^1)].weight && !u->next )
                {
                    u->next = nilNode;
                    last = last->next = u;
                }
                if( ej > 0 && vtxPtr+edgePtr[ej].dst == v )
                {
                    orphans.push_back(u);
                    u->parent = ORPHAN;
                }
            }
        }
    }
    return flow;
}

template <class TWeight>
bool GCGraph<TWeight>::inSourceSegment( int i )
{
    CV_Assert( i>=0 && i<(int)vtcs.size() );
    return vtcs[i].t == 0;
};

#endif


================================================
FILE: toolkits/computer_vision/grabcut.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"

#include <iostream>

using namespace std;
using namespace cv;

void help()
{
    cout << "\nThis program demonstrates GrabCut segmentation -- select an object in a region\n"
    		"and then grabcut will attempt to segment it out.\n"
    		"Call:\n"
    		"./grabcut <image_name>\n"
    	"\nSelect a rectangular area around the object you want to segment\n" <<
        "\nHot keys: \n"
        "\tESC - quit the program\n"
        "\tr - restore the original image\n"
        "\tn - next iteration\n"
        "\n"
        "\tleft mouse button - set rectangle\n"
        "\n"
        "\tCTRL+left mouse button - set GC_BGD pixels\n"
        "\tSHIFT+left mouse button - set CG_FGD pixels\n"
        "\n"
        "\tCTRL+right mouse button - set GC_PR_BGD pixels\n"
        "\tSHIFT+right mouse button - set CG_PR_FGD pixels\n" << endl;
}

const Scalar RED = Scalar(0,0,255);
const Scalar PINK = Scalar(230,130,255);
const Scalar BLUE = Scalar(255,0,0);
const Scalar LIGHTBLUE = Scalar(255,255,160);
const Scalar GREEN = Scalar(0,255,0);

const int BGD_KEY = CV_EVENT_FLAG_CTRLKEY;
const int FGD_KEY = CV_EVENT_FLAG_SHIFTKEY;

void getBinMask( const Mat& comMask, Mat& binMask )
{
    if( comMask.empty() || comMask.type()!=CV_8UC1 )
        CV_Error( CV_StsBadArg, "comMask is empty or has incorrect type (not CV_8UC1)" );
    if( binMask.empty() || binMask.rows!=comMask.rows || binMask.cols!=comMask.cols )
        binMask.create( comMask.size(), CV_8UC1 );
    binMask = comMask & 1;
}

class GCApplication
{
public:
    enum{ NOT_SET = 0, IN_PROCESS = 1, SET = 2 };
    static const int radius = 2;
    static const int thickness = -1;

    void reset();
    void setImageAndWinName( const Mat& _image, const string& _winName );
    void showImage() const;
    void mouseClick( int event, int x, int y, int flags, void* param );
    int nextIter();
    int getIterCount() const { return iterCount; }
private:
    void setRectInMask();
    void setLblsInMask( int flags, Point p, bool isPr );

    const string* winName;
    const Mat* image;
    Mat mask;
    Mat bgdModel, fgdModel;

    uchar rectState, lblsState, prLblsState;
    bool isInitialized;

    Rect rect;
    vector<Point> fgdPxls, bgdPxls, prFgdPxls, prBgdPxls;
    int iterCount;
};

void GCApplication::reset()
{
    if( !mask.empty() )
        mask.setTo(Scalar::all(GC_BGD));
    bgdPxls.clear(); fgdPxls.clear();
    prBgdPxls.clear();  prFgdPxls.clear();

    isInitialized = false;
    rectState = NOT_SET;
    lblsState = NOT_SET;
    prLblsState = NOT_SET;
    iterCount = 0;
}

void GCApplication::setImageAndWinName( const Mat& _image, const string& _winName  )
{
    if( _image.empty() || _winName.empty() )
        return;
    image = &_image;
    winName = &_winName;
    mask.create( image->size(), CV_8UC1);
    reset();
}

void GCApplication::showImage() const
{
    if( image->empty() || winName->empty() )
        return;

    Mat res;
    Mat binMask;
    if( !isInitialized )
        image->copyTo( res );
    else
    {
        getBinMask( mask, binMask );
        image->copyTo( res, binMask );
    }

    vector<Point>::const_iterator it;
    for( it = bgdPxls.begin(); it != bgdPxls.end(); ++it )
        circle( res, *it, radius, BLUE, thickness );
    for( it = fgdPxls.begin(); it != fgdPxls.end(); ++it )
        circle( res, *it, radius, RED, thickness );
    for( it = prBgdPxls.begin(); it != prBgdPxls.end(); ++it )
        circle( res, *it, radius, LIGHTBLUE, thickness );
    for( it = prFgdPxls.begin(); it != prFgdPxls.end(); ++it )
        circle( res, *it, radius, PINK, thickness );

    if( rectState == IN_PROCESS || rectState == SET )
        rectangle( res, Point( rect.x, rect.y ), Point(rect.x + rect.width, rect.y + rect.height ), GREEN, 2);

    imshow( *winName, res );
}

void GCApplication::setRectInMask()
{
    assert( !mask.empty() );
    mask.setTo( GC_BGD );
    rect.x = max(0, rect.x);
    rect.y = max(0, rect.y);
    rect.width = min(rect.width, image->cols-rect.x);
    rect.height = min(rect.height, image->rows-rect.y);
    (mask(rect)).setTo( Scalar(GC_PR_FGD) );
}

void GCApplication::setLblsInMask( int flags, Point p, bool isPr )
{
    vector<Point> *bpxls, *fpxls;
    uchar bvalue, fvalue;
    if( !isPr )
    {
        bpxls = &bgdPxls;
        fpxls = &fgdPxls;
        bvalue = GC_BGD;
        fvalue = GC_FGD;
    }
    else
    {
        bpxls = &prBgdPxls;
        fpxls = &prFgdPxls;
        bvalue = GC_PR_BGD;
        fvalue = GC_PR_FGD;
    }
    if( flags & BGD_KEY )
    {
        bpxls->push_back(p);
        circle( mask, p, radius, bvalue, thickness );
    }
    if( flags & FGD_KEY )
    {
        fpxls->push_back(p);
        circle( mask, p, radius, fvalue, thickness );
    }
}

void GCApplication::mouseClick( int event, int x, int y, int flags, void* )
{
    // TODO add bad args check
    switch( event )
    {
    case CV_EVENT_LBUTTONDOWN: // set rect or GC_BGD(GC_FGD) labels
        {
            bool isb = (flags & BGD_KEY) != 0,
                 isf = (flags & FGD_KEY) != 0;
            if( rectState == NOT_SET && !isb && !isf )
            {
                rectState = IN_PROCESS;
                rect = Rect( x, y, 1, 1 );
            }
            if ( (isb || isf) && rectState == SET )
                lblsState = IN_PROCESS;
        }
        break;
    case CV_EVENT_RBUTTONDOWN: // set GC_PR_BGD(GC_PR_FGD) labels
        {
            bool isb = (flags & BGD_KEY) != 0,
                 isf = (flags & FGD_KEY) != 0;
            if ( (isb || isf) && rectState == SET )
                prLblsState = IN_PROCESS;
        }
        break;
    case CV_EVENT_LBUTTONUP:
        if( rectState == IN_PROCESS )
        {
            rect = Rect( Point(rect.x, rect.y), Point(x,y) );
            rectState = SET;
            setRectInMask();
            assert( bgdPxls.empty() && fgdPxls.empty() && prBgdPxls.empty() && prFgdPxls.empty() );
            showImage();
        }
        if( lblsState == IN_PROCESS )
        {
            setLblsInMask(flags, Point(x,y), false);
            lblsState = SET;
            showImage();
        }
        break;
    case CV_EVENT_RBUTTONUP:
        if( prLblsState == IN_PROCESS )
        {
            setLblsInMask(flags, Point(x,y), true);
            prLblsState = SET;
            showImage();
        }
        break;
    case CV_EVENT_MOUSEMOVE:
        if( rectState == IN_PROCESS )
        {
            rect = Rect( Point(rect.x, rect.y), Point(x,y) );
            assert( bgdPxls.empty() && fgdPxls.empty() && prBgdPxls.empty() && prFgdPxls.empty() );
            showImage();
        }
        else if( lblsState == IN_PROCESS )
        {
            setLblsInMask(flags, Point(x,y), false);
            showImage();
        }
        else if( prLblsState == IN_PROCESS )
        {
            setLblsInMask(flags, Point(x,y), true);
            showImage();
        }
        break;
    }
}

int GCApplication::nextIter()
{
    if( isInitialized )
        grabCut( *image, mask, rect, bgdModel, fgdModel, 1 );
    else
    {
        if( rectState != SET )
            return iterCount;

        if( lblsState == SET || prLblsState == SET )
            grabCut( *image, mask, rect, bgdModel, fgdModel, 1, GC_INIT_WITH_MASK );
        else
            grabCut( *image, mask, rect, bgdModel, fgdModel, 1, GC_INIT_WITH_RECT );

        isInitialized = true;
    }
    iterCount++;

    bgdPxls.clear(); fgdPxls.clear();
    prBgdPxls.clear(); prFgdPxls.clear();

    return iterCount;
}

GCApplication gcapp;

void on_mouse( int event, int x, int y, int flags, void* param )
{
    gcapp.mouseClick( event, x, y, flags, param );
}

int main( int argc, char** argv )
{
    if( argc!=2 )
    {
    	help();
        return 1;
    }
    string filename = argv[1];
    if( filename.empty() )
    {
    	cout << "\nDurn, couldn't read in " << argv[1] << endl;
        return 1;
    }
    Mat image = imread( filename, 1 );
    if( image.empty() )
    {
        cout << "\n Durn, couldn't read image filename " << filename << endl;
    	return 1;
    }

    help();

    const string winName = "image";
    cvNamedWindow( winName.c_str(), CV_WINDOW_AUTOSIZE );
    cvSetMouseCallback( winName.c_str(), on_mouse, 0 );

    gcapp.setImageAndWinName( image, winName );
    gcapp.showImage();

    for(;;)
    {
        int c = cvWaitKey(0);
        switch( (char) c )
        {
        case '\x1b':
            cout << "Exiting ..." << endl;
            goto exit_main;
        case 'r':
            cout << endl;
            gcapp.reset();
            gcapp.showImage();
            break;
        case 'n':
            int iterCount = gcapp.getIterCount();
            cout << "<" << iterCount << "... ";
            int newIterCount = gcapp.nextIter();
            if( newIterCount > iterCount )
            {
                gcapp.showImage();
                cout << iterCount << ">" << endl;
            }
            else
                cout << "rect must be determined>" << endl;
            break;
        }
    }

exit_main:
    cvDestroyWindow( winName.c_str() );
    return 0;
}


================================================
FILE: toolkits/computer_vision/opencv_serialization.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include "opencv_serialization.hpp"


//////////////////////////////////////////////////
// For Size
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::Size& img_size) 
{
    arc << img_size.width << img_size.height;  
    return arc;
} 

graphlab::iarchive& operator>>(graphlab::iarchive& arc, cv::Size& img_size) 
{
    arc >> img_size.width >> img_size.height;  
    return arc;
} 


//////////////////////////////////////////////////
// For Point2f
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::Point2f& pt) 
{
    arc << pt.x << pt.y;
    return arc;
} 

graphlab::iarchive& operator>>(graphlab::iarchive& arc, cv::Point2f& pt) 
{
    arc >> pt.x >> pt.y;
    return arc;
} 


//////////////////////////////////////////////////
// For KeyPoint
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::KeyPoint& keypoint) 
{
    arc << keypoint.pt 
    << keypoint.size << keypoint.angle << keypoint.response
    << keypoint.octave << keypoint.class_id;
    return arc;
} 

graphlab::iarchive& operator>>(graphlab::iarchive& arc, cv::KeyPoint& keypoint) 
{
    arc >> keypoint.pt 
    >> keypoint.size >> keypoint.angle >> keypoint.response
    >> keypoint.octave >> keypoint.class_id;
    return arc;
} 


//////////////////////////////////////////////////
// For Mat
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::Mat& mat) 
{
    size_t elem_size = mat.elemSize();
    size_t elem_type = mat.type();
    
    arc << mat.cols << mat.rows 
    << elem_size << elem_type;
    
    const size_t data_size = mat.cols * mat.rows * elem_size;
    graphlab::serialize(arc, mat.ptr(), data_size);
    return arc;
} 

graphlab::iarchive& operator>>(graphlab::oarchive& arc, const cv::Mat& mat) 
{
    int cols, rows; size_t elem_size, elem_type;
    
    arc >> cols >> rows 
    >> elem_size >> elem_type;
    
    mat.create(rows, cols, elem_type);
    
    size_t data_size = mat.cols * mat.rows * elem_size;
    graphlab::deserialize(arc, mat.ptr(), data_size);
    return arc;
} 


//////////////////////////////////////////////////
// For ImageFeatures
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::detail::ImageFeatures& features) 
{
    arc << features.img_idx << features.img_size
    << features.keypoints
    << features.descriptors;    
    return arc;
} 


graphlab::iarchive& operator>>(graphlab::oarchive& arc, const cv::detail::ImageFeatures& features) 
{
    arc >> features.img_idx >> features.img_size
    >> features.keypoints
    >> features.descriptors;
    return arc;
} 


//////////////////////////////////////////////////
// For DMatch
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::DMatch& match) 
{
    arc << match.queryIdx << match.trainIdx << match.imgIdx
    << match.distance;     
    return arc;
}


graphlab::iarchive& operator>>(graphlab::oarchive& arc, const cv::DMatch& match) 
{
    arc >> match.queryIdx >> match.trainIdx >> match.imgIdx
    >> match.distance;     
    return arc;
}


//////////////////////////////////////////////////
// For MatchesInfo 
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::detail::MatchesInfo& matchesinfo) 
{
    arc << matchesinfo.src_img_idx << matchesinfo.dst_img_idx
    << matchesinfo.matches 
    << matchesinfo.inliers_mask << matchesinfo.num_inliers
    << matchesinfo.H 
    << matchesinfo.confidence;
    return arc;
}


graphlab::iarchive& operator>>(graphlab::oarchive& arc, const cv::detail::MatchesInfo& matchesinfo) 
{
    arc >> matchesinfo.src_img_idx >> matchesinfo.dst_img_idx
    >> matchesinfo.matches 
    >> matchesinfo.inliers_mask >> matchesinfo.num_inliers
    >> matchesinfo.H 
    >> matchesinfo.confidence;
    return arc;
}


//////////////////////////////////////////////////
// For CameraParams
graphlab::oarchive& operator<<(graphlab::oarchive& arc, const cv::detail::CameraParams& camera) 
{
    arc << camera.focal << camera.aspect 
    << camera.ppx << camera.ppy
    << camera.R << camera.t;
    return arc;
} 


graphlab::iarchive& operator>>(graphlab::oarchive& arc, const cv::detail::CameraParams& camera) 
{
    arc >> camera.focal >> camera.aspect 
    >> camera.ppx >> camera.ppy
    >> camera.R >> camera.t;
    return arc;
} 


================================================
FILE: toolkits/computer_vision/opencv_serialization.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#ifndef OPENCV_SERIALIZATION_HPP
#define OPENCV_SERIALIZATION_HPP

#include <graphlab.hpp>

#include "opencv2/opencv_modules.hpp"
#include "opencv2/opencv.hpp"
#include "opencv2/stitching/stitcher.hpp"


//////////////////////////////////////////////////
// For Size
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::Size, img_size) 
{
    arc << img_size.width << img_size.height;
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::Size, img_size) 
{
    arc >> img_size.width >> img_size.height;
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For Point2f
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::Point2f, pt) 
{
    arc << pt.x << pt.y;
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::Point2f, pt) 
{
    arc >> pt.x >> pt.y;
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For KeyPoint
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::KeyPoint, keypoint) 
{
    arc << keypoint.pt 
    << keypoint.size << keypoint.angle << keypoint.response
    << keypoint.octave << keypoint.class_id;
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::KeyPoint, keypoint) 
{
    arc >> keypoint.pt 
    >> keypoint.size >> keypoint.angle >> keypoint.response
    >> keypoint.octave >> keypoint.class_id;
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For Mat
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::Mat, mat) 
{
    size_t elem_size = mat.elemSize();
    size_t elem_type = mat.type();
    
    arc << mat.cols << mat.rows 
    << elem_size << elem_type;
    
    const size_t data_size = mat.cols * mat.rows * elem_size;
    graphlab::serialize(arc, mat.ptr(), data_size);
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::Mat, mat) 
{
    int cols, rows; size_t elem_size, elem_type;

    arc >> cols >> rows 
    >> elem_size >> elem_type;
    
    mat.create(rows, cols, elem_type);
    
    size_t data_size = mat.cols * mat.rows * elem_size;
    graphlab::deserialize(arc, mat.ptr(), data_size);    
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For ImageFeatures
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::detail::ImageFeatures, features) 
{
    arc << features.img_idx << features.img_size
    << features.keypoints
    << features.descriptors;
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::detail::ImageFeatures, features) 
{
    arc >> features.img_idx >> features.img_size
    >> features.keypoints
    >> features.descriptors;
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For DMatch
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::DMatch, match) 
{
    arc << match.queryIdx << match.trainIdx << match.imgIdx
    << match.distance;     
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::DMatch, match) 
{
    arc >> match.queryIdx >> match.trainIdx >> match.imgIdx
    >> match.distance;     
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For MatchesInfo 
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::detail::MatchesInfo, matchesinfo) 
{
    arc << matchesinfo.src_img_idx << matchesinfo.dst_img_idx
    << matchesinfo.matches 
    << matchesinfo.inliers_mask << matchesinfo.num_inliers
    << matchesinfo.H 
    << matchesinfo.confidence;    
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::detail::MatchesInfo, matchesinfo) 
{
    arc >> matchesinfo.src_img_idx >> matchesinfo.dst_img_idx
    >> matchesinfo.matches 
    >> matchesinfo.inliers_mask >> matchesinfo.num_inliers
    >> matchesinfo.H 
    >> matchesinfo.confidence;
} END_OUT_OF_PLACE_LOAD()


//////////////////////////////////////////////////
// For CameraParams
BEGIN_OUT_OF_PLACE_SAVE(arc, cv::detail::CameraParams, camera) 
{
    arc << camera.focal << camera.aspect 
    << camera.ppx << camera.ppy
    << camera.R << camera.t;
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, cv::detail::CameraParams, camera) 
{
    arc >> camera.focal >> camera.aspect 
    >> camera.ppx >> camera.ppy
    >> camera.R >> camera.t;
} END_OUT_OF_PLACE_LOAD()

#endif


================================================
FILE: toolkits/computer_vision/precomp.hpp
================================================
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                          License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __OPENCV_STITCHING_PRECOMP_H__
#define __OPENCV_STITCHING_PRECOMP_H__

#ifdef HAVE_CVCONFIG_H 
#include "cvconfig.h"
#endif
#include "opencv2/opencv_modules.hpp"

#include <vector>
#include <algorithm>
#include <utility>
#include <set>
#include <functional>
#include <sstream>
#include <cmath>
#include "opencv2/core/core.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/stitching/stitcher.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/util.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/calib3d/calib3d.hpp"
#ifdef HAVE_OPENCV_GPU
# include "opencv2/gpu/gpu.hpp"
#endif

//#include "../../imgproc/src/gcgraph.hpp"
#include "gcgraph.hpp"

#ifdef HAVE_TEGRA_OPTIMIZATION
# include "opencv2/stitching/stitching_tegra.hpp"
#endif

#endif


================================================
FILE: toolkits/computer_vision/seam_finders_gr.hpp
================================================
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                          License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#ifndef __SEAM_FINDERS_GR_HPP__
#define __SEAM_FINDERS_GR_HPP__

#include "precomp.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"

namespace cv {
namespace detail {

void PairwiseSeamFinder::find(const vector<Mat> &src, const vector<Point> &corners,
                              vector<Mat> &masks)
{
    LOGLN("Finding seams...");
    if (src.size() == 0) 
        return;

    int64 t = getTickCount();

    images_ = src;
    sizes_.resize(src.size());
    for (size_t i = 0; i < src.size(); ++i)
        sizes_[i] = src[i].size();
    corners_ = corners;
    masks_ = masks;
    run();

    LOGLN("Finding seams, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
}


void PairwiseSeamFinder::run()
{
    for (size_t i = 0; i < sizes_.size() - 1; ++i)
    {
        for (size_t j = i + 1; j < sizes_.size(); ++j)
        {
            Rect roi;
            if (overlapRoi(corners_[i], corners_[j], sizes_[i], sizes_[j], roi))
                findInPair(i, j, roi);
        }
    }
}


void VoronoiSeamFinder::find(const vector<Size> &sizes, const vector<Point> &corners,
                             vector<Mat> &masks)
{
    LOGLN("Finding seams...");
    if (sizes.size() == 0) 
        return;

    int64 t = getTickCount();

    sizes_ = sizes;
    corners_ = corners;
    masks_ = masks;
    run();

    LOGLN("Finding seams, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
}


void VoronoiSeamFinder::findInPair(size_t first, size_t second, Rect roi)
{
    const int gap = 10;
    Mat submask1(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat submask2(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);

    Size img1 = sizes_[first], img2 = sizes_[second];
    Mat mask1 = masks_[first], mask2 = masks_[second];
    Point tl1 = corners_[first], tl2 = corners_[second];

    // Cut submasks with some gap
    for (int y = -gap; y < roi.height + gap; ++y)
    {
        for (int x = -gap; x < roi.width + gap; ++x)
        {
            int y1 = roi.y - tl1.y + y;
            int x1 = roi.x - tl1.x + x;
            if (y1 >= 0 && x1 >= 0 && y1 < img1.height && x1 < img1.width)
                submask1.at<uchar>(y + gap, x + gap) = mask1.at<uchar>(y1, x1);
            else
                submask1.at<uchar>(y + gap, x + gap) = 0;

            int y2 = roi.y - tl2.y + y;
            int x2 = roi.x - tl2.x + x;
            if (y2 >= 0 && x2 >= 0 && y2 < img2.height && x2 < img2.width)
                submask2.at<uchar>(y + gap, x + gap) = mask2.at<uchar>(y2, x2);
            else
                submask2.at<uchar>(y + gap, x + gap) = 0;
        }
    }

    Mat collision = (submask1 != 0) & (submask2 != 0);
    Mat unique1 = submask1.clone(); unique1.setTo(0, collision);
    Mat unique2 = submask2.clone(); unique2.setTo(0, collision);

    Mat dist1, dist2;
    distanceTransform(unique1 == 0, dist1, CV_DIST_L1, 3);
    distanceTransform(unique2 == 0, dist2, CV_DIST_L1, 3);

    Mat seam = dist1 < dist2;

    for (int y = 0; y < roi.height; ++y)
    {
        for (int x = 0; x < roi.width; ++x)
        {
            if (seam.at<uchar>(y + gap, x + gap))
                mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x) = 0;
            else
                mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x) = 0;
        }
    }
}


class GraphCutSeamFinder::Impl : public PairwiseSeamFinder
{
public:
    Impl(int cost_type, float terminal_cost, float bad_region_penalty)
        : cost_type_(cost_type), terminal_cost_(terminal_cost), bad_region_penalty_(bad_region_penalty) {}

	~Impl() {}

    void find(const vector<Mat> &src, const vector<Point> &corners, vector<Mat> &masks);
    void findInPair(size_t first, size_t second, Rect roi);

private:
    void setGraphWeightsColor(const Mat &img1, const Mat &img2, 
                              const Mat &mask1, const Mat &mask2, GCGraph<float> &graph);
    void setGraphWeightsColorGrad(const Mat &img1, const Mat &img2, const Mat &dx1, const Mat &dx2, 
                                  const Mat &dy1, const Mat &dy2, const Mat &mask1, const Mat &mask2, 
                                  GCGraph<float> &graph);

    vector<Mat> dx_, dy_;
    int cost_type_;
    float terminal_cost_;
    float bad_region_penalty_;
};


void GraphCutSeamFinder::Impl::find(const vector<Mat> &src, const vector<Point> &corners,
                                    vector<Mat> &masks)
{
    // Compute gradients
    dx_.resize(src.size());
    dy_.resize(src.size());
    Mat dx, dy;
    for (size_t i = 0; i < src.size(); ++i)
    {
        CV_Assert(src[i].channels() == 3);
        Sobel(src[i], dx, CV_32F, 1, 0);
        Sobel(src[i], dy, CV_32F, 0, 1);
        dx_[i].create(src[i].size(), CV_32F);
        dy_[i].create(src[i].size(), CV_32F);
        for (int y = 0; y < src[i].rows; ++y)
        {
            const Point3f* dx_row = dx.ptr<Point3f>(y);
            const Point3f* dy_row = dy.ptr<Point3f>(y);
            float* dx_row_ = dx_[i].ptr<float>(y);
            float* dy_row_ = dy_[i].ptr<float>(y);
            for (int x = 0; x < src[i].cols; ++x)
            {
                dx_row_[x] = normL2(dx_row[x]);
                dy_row_[x] = normL2(dy_row[x]);
            }
        }
    }
    PairwiseSeamFinder::find(src, corners, masks);
}


void GraphCutSeamFinder::Impl::setGraphWeightsColor(const Mat &img1, const Mat &img2,
                                                    const Mat &mask1, const Mat &mask2, GCGraph<float> &graph)
{
    const Size img_size = img1.size();

    // Set terminal weights
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            int v = graph.addVtx();
            graph.addTermWeights(v, mask1.at<uchar>(y, x) ? terminal_cost_ : 0.f,
                                    mask2.at<uchar>(y, x) ? terminal_cost_ : 0.f);
        }
    }

    // Set regular edge weights
    const float weight_eps = 1.f;
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            int v = y * img_size.width + x;
            if (x < img_size.width - 1)
            {
                float weight = normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               normL2(img1.at<Point3f>(y, x + 1), img2.at<Point3f>(y, x + 1)) +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y, x + 1) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y, x + 1))
                    weight += bad_region_penalty_;
                graph.addEdges(v, v + 1, weight, weight);
            }
            if (y < img_size.height - 1)
            {
                float weight = normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               normL2(img1.at<Point3f>(y + 1, x), img2.at<Point3f>(y + 1, x)) +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y + 1, x) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y + 1, x))
                    weight += bad_region_penalty_;
                graph.addEdges(v, v + img_size.width, weight, weight);
            }
        }
    }
}


void GraphCutSeamFinder::Impl::setGraphWeightsColorGrad(
        const Mat &img1, const Mat &img2, const Mat &dx1, const Mat &dx2, 
        const Mat &dy1, const Mat &dy2, const Mat &mask1, const Mat &mask2, 
        GCGraph<float> &graph)
{
    const Size img_size = img1.size();

    // Set terminal weights
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            int v = graph.addVtx();
            graph.addTermWeights(v, mask1.at<uchar>(y, x) ? terminal_cost_ : 0.f,
                                    mask2.at<uchar>(y, x) ? terminal_cost_ : 0.f);
        }
    }

    // Set regular edge weights
    const float weight_eps = 1.f;
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            int v = y * img_size.width + x;
            if (x < img_size.width - 1)
            {
                float grad = dx1.at<float>(y, x) + dx1.at<float>(y, x + 1) +
                             dx2.at<float>(y, x) + dx2.at<float>(y, x + 1) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                                normL2(img1.at<Point3f>(y, x + 1), img2.at<Point3f>(y, x + 1))) / grad + 
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y, x + 1) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y, x + 1))
                    weight += bad_region_penalty_;
                graph.addEdges(v, v + 1, weight, weight);
            }
            if (y < img_size.height - 1)
            {
                float grad = dy1.at<float>(y, x) + dy1.at<float>(y + 1, x) + 
                             dy2.at<float>(y, x) + dy2.at<float>(y + 1, x) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) + 
                                normL2(img1.at<Point3f>(y + 1, x), img2.at<Point3f>(y + 1, x))) / grad + 
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y + 1, x) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y + 1, x))
                    weight += bad_region_penalty_;
                graph.addEdges(v, v + img_size.width, weight, weight);
            }
        }
    }
}


void GraphCutSeamFinder::Impl::findInPair(size_t first, size_t second, Rect roi)
{
    Mat img1 = images_[first], img2 = images_[second];
    Mat dx1 = dx_[first], dx2 = dx_[second];
    Mat dy1 = dy_[first], dy2 = dy_[second];
    Mat mask1 = masks_[first], mask2 = masks_[second];
    Point tl1 = corners_[first], tl2 = corners_[second];

    const int gap = 10;
    Mat subimg1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat subimg2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat submask1(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat submask2(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat subdx1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdx2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);

    // Cut subimages and submasks with some gap
    for (int y = -gap; y < roi.height + gap; ++y)
    {
        for (int x = -gap; x < roi.width + gap; ++x)
        {
            int y1 = roi.y - tl1.y + y;
            int x1 = roi.x - tl1.x + x;
            if (y1 >= 0 && x1 >= 0 && y1 < img1.rows && x1 < img1.cols)
            {
                subimg1.at<Point3f>(y + gap, x + gap) = img1.at<Point3f>(y1, x1);
                submask1.at<uchar>(y + gap, x + gap) = mask1.at<uchar>(y1, x1);
                subdx1.at<float>(y + gap, x + gap) = dx1.at<float>(y1, x1);
                subdy1.at<float>(y + gap, x + gap) = dy1.at<float>(y1, x1);
            }
            else
            {
                subimg1.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask1.at<uchar>(y + gap, x + gap) = 0;
                subdx1.at<float>(y + gap, x + gap) = 0.f;
                subdy1.at<float>(y + gap, x + gap) = 0.f;
            }

            int y2 = roi.y - tl2.y + y;
            int x2 = roi.x - tl2.x + x;
            if (y2 >= 0 && x2 >= 0 && y2 < img2.rows && x2 < img2.cols)
            {
                subimg2.at<Point3f>(y + gap, x + gap) = img2.at<Point3f>(y2, x2);
                submask2.at<uchar>(y + gap, x + gap) = mask2.at<uchar>(y2, x2);
                subdx2.at<float>(y + gap, x + gap) = dx2.at<float>(y2, x2);
                subdy2.at<float>(y + gap, x + gap) = dy2.at<float>(y2, x2);
            }
            else
            {
                subimg2.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask2.at<uchar>(y + gap, x + gap) = 0;
                subdx2.at<float>(y + gap, x + gap) = 0.f;
                subdy2.at<float>(y + gap, x + gap) = 0.f;
            }
        }
    }

    const int vertex_count = (roi.height + 2 * gap) * (roi.width + 2 * gap);
    const int edge_count = (roi.height - 1 + 2 * gap) * (roi.width + 2 * gap) +
                           (roi.width - 1 + 2 * gap) * (roi.height + 2 * gap);
    GCGraph<float> graph(vertex_count, edge_count);

    switch (cost_type_)
    {
    case GraphCutSeamFinder::COST_COLOR:
        setGraphWeightsColor(subimg1, subimg2, submask1, submask2, graph);
        break;
    case GraphCutSeamFinder::COST_COLOR_GRAD:
        setGraphWeightsColorGrad(subimg1, subimg2, subdx1, subdx2, subdy1, subdy2, 
                                 submask1, submask2, graph);
        break;
    default:
        CV_Error(CV_StsBadArg, "unsupported pixel similarity measure");
    }

    graph.maxFlow();

    for (int y = 0; y < roi.height; ++y)
    {
        for (int x = 0; x < roi.width; ++x)
        {
            if (graph.inSourceSegment((y + gap) * (roi.width + 2 * gap) + x + gap))
            {
                if (mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x))
                    mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x) = 0;
            }
            else
            {
                if (mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x))
                    mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x) = 0;
            }
        }
    }
}


GraphCutSeamFinder::GraphCutSeamFinder(int cost_type, float terminal_cost, float bad_region_penalty)
    : impl_(new Impl(cost_type, terminal_cost, bad_region_penalty)) {}

GraphCutSeamFinder::~GraphCutSeamFinder() {}


void GraphCutSeamFinder::find(const vector<Mat> &src, const vector<Point> &corners,
                              vector<Mat> &masks)
{
    impl_->find(src, corners, masks);
}


#ifdef HAVE_OPENCV_GPU
void GraphCutSeamFinderGpu::find(const vector<Mat> &src, const vector<Point> &corners,
                                 vector<Mat> &masks)
{
    // Compute gradients
    dx_.resize(src.size());
    dy_.resize(src.size());
    Mat dx, dy;
    for (size_t i = 0; i < src.size(); ++i)
    {
        CV_Assert(src[i].channels() == 3);
        Sobel(src[i], dx, CV_32F, 1, 0);
        Sobel(src[i], dy, CV_32F, 0, 1);
        dx_[i].create(src[i].size(), CV_32F);
        dy_[i].create(src[i].size(), CV_32F);
        for (int y = 0; y < src[i].rows; ++y)
        {
            const Point3f* dx_row = dx.ptr<Point3f>(y);
            const Point3f* dy_row = dy.ptr<Point3f>(y);
            float* dx_row_ = dx_[i].ptr<float>(y);
            float* dy_row_ = dy_[i].ptr<float>(y);
            for (int x = 0; x < src[i].cols; ++x)
            {
                dx_row_[x] = normL2(dx_row[x]);
                dy_row_[x] = normL2(dy_row[x]);
            }
        }
    }
    PairwiseSeamFinder::find(src, corners, masks);
}


void GraphCutSeamFinderGpu::findInPair(size_t first, size_t second, Rect roi)
{
    Mat img1 = images_[first], img2 = images_[second];
    Mat dx1 = dx_[first], dx2 = dx_[second];
    Mat dy1 = dy_[first], dy2 = dy_[second];
    Mat mask1 = masks_[first], mask2 = masks_[second];
    Point tl1 = corners_[first], tl2 = corners_[second];

    const int gap = 10;
    Mat subimg1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat subimg2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat submask1(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat submask2(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat subdx1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdx2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);

    // Cut subimages and submasks with some gap
    for (int y = -gap; y < roi.height + gap; ++y)
    {
        for (int x = -gap; x < roi.width + gap; ++x)
        {
            int y1 = roi.y - tl1.y + y;
            int x1 = roi.x - tl1.x + x;
            if (y1 >= 0 && x1 >= 0 && y1 < img1.rows && x1 < img1.cols)
            {
                subimg1.at<Point3f>(y + gap, x + gap) = img1.at<Point3f>(y1, x1);
                submask1.at<uchar>(y + gap, x + gap) = mask1.at<uchar>(y1, x1);
                subdx1.at<float>(y + gap, x + gap) = dx1.at<float>(y1, x1);
                subdy1.at<float>(y + gap, x + gap) = dy1.at<float>(y1, x1);
            }
            else
            {
                subimg1.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask1.at<uchar>(y + gap, x + gap) = 0;
                subdx1.at<float>(y + gap, x + gap) = 0.f;
                subdy1.at<float>(y + gap, x + gap) = 0.f;
            }

            int y2 = roi.y - tl2.y + y;
            int x2 = roi.x - tl2.x + x;
            if (y2 >= 0 && x2 >= 0 && y2 < img2.rows && x2 < img2.cols)
            {
                subimg2.at<Point3f>(y + gap, x + gap) = img2.at<Point3f>(y2, x2);
                submask2.at<uchar>(y + gap, x + gap) = mask2.at<uchar>(y2, x2);
                subdx2.at<float>(y + gap, x + gap) = dx2.at<float>(y2, x2);
                subdy2.at<float>(y + gap, x + gap) = dy2.at<float>(y2, x2);
            }
            else
            {
                subimg2.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask2.at<uchar>(y + gap, x + gap) = 0;
                subdx2.at<float>(y + gap, x + gap) = 0.f;
                subdy2.at<float>(y + gap, x + gap) = 0.f;
            }
        }
    }
    
    Mat terminals, leftT, rightT, top, bottom;

    switch (cost_type_)
    {
    case GraphCutSeamFinder::COST_COLOR:
        setGraphWeightsColor(subimg1, subimg2, submask1, submask2, 
                             terminals, leftT, rightT, top, bottom);
        break;
    case GraphCutSeamFinder::COST_COLOR_GRAD:
        setGraphWeightsColorGrad(subimg1, subimg2, subdx1, subdx2, subdy1, subdy2, 
                                 submask1, submask2, terminals, leftT, rightT, top, bottom);
        break;
    default:
        CV_Error(CV_StsBadArg, "unsupported pixel similarity measure");
    }

    gpu::GpuMat terminals_d(terminals);
    gpu::GpuMat leftT_d(leftT);
    gpu::GpuMat rightT_d(rightT);
    gpu::GpuMat top_d(top);
    gpu::GpuMat bottom_d(bottom);
    gpu::GpuMat labels_d, buf_d;

    gpu::graphcut(terminals_d, leftT_d, rightT_d, top_d, bottom_d, labels_d, buf_d);

    Mat_<uchar> labels = (Mat)labels_d;
    for (int y = 0; y < roi.height; ++y)
    {
        for (int x = 0; x < roi.width; ++x)
        {
            if (labels(y + gap, x + gap))
            {
                if (mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x))
                    mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x) = 0;
            }
            else
            {
                if (mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x))
                    mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x) = 0;
            }
        }
    }
}


void GraphCutSeamFinderGpu::setGraphWeightsColor(const Mat &img1, const Mat &img2, const Mat &mask1, const Mat &mask2, 
                                                 Mat &terminals, Mat &leftT, Mat &rightT, Mat &top, Mat &bottom)
{
    const Size img_size = img1.size();

    terminals.create(img_size, CV_32S);
    leftT.create(Size(img_size.height, img_size.width), CV_32S);
    rightT.create(Size(img_size.height, img_size.width), CV_32S);
    top.create(img_size, CV_32S);
    bottom.create(img_size, CV_32S);

    Mat_<int> terminals_(terminals);
    Mat_<int> leftT_(leftT);
    Mat_<int> rightT_(rightT);
    Mat_<int> top_(top);
    Mat_<int> bottom_(bottom);

    // Set terminal weights
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            float source = mask1.at<uchar>(y, x) ? terminal_cost_ : 0.f;
            float sink = mask2.at<uchar>(y, x) ? terminal_cost_ : 0.f;
            terminals_(y, x) = saturate_cast<int>((source - sink) * 255.f);
        }
    }

    // Set regular edge weights
    const float weight_eps = 1.f;
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            if (x > 0)
            {
                float weight = normL2(img1.at<Point3f>(y, x - 1), img2.at<Point3f>(y, x - 1)) +
                               normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               weight_eps;
                if (!mask1.at<uchar>(y, x - 1) || !mask1.at<uchar>(y, x) ||
                    !mask2.at<uchar>(y, x - 1) || !mask2.at<uchar>(y, x))
                    weight += bad_region_penalty_;
                leftT_(x, y) = saturate_cast<int>(weight * 255.f);
            }
            else
                leftT_(x, y) = 0;

            if (x < img_size.width - 1)
            {
                float weight = normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               normL2(img1.at<Point3f>(y, x + 1), img2.at<Point3f>(y, x + 1)) +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y, x + 1) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y, x + 1))
                    weight += bad_region_penalty_;
                rightT_(x, y) = saturate_cast<int>(weight * 255.f);
            }
            else
                rightT_(x, y) = 0;

            if (y > 0)
            {
                float weight = normL2(img1.at<Point3f>(y - 1, x), img2.at<Point3f>(y - 1, x)) +
                               normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               weight_eps;
                if (!mask1.at<uchar>(y - 1, x) || !mask1.at<uchar>(y, x) ||
                    !mask2.at<uchar>(y - 1, x) || !mask2.at<uchar>(y, x))
                    weight += bad_region_penalty_;
                top_(y, x) = saturate_cast<int>(weight * 255.f);
            }
            else
                top_(y, x) = 0;

            if (y < img_size.height - 1)
            {
                float weight = normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                               normL2(img1.at<Point3f>(y + 1, x), img2.at<Point3f>(y + 1, x)) +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y + 1, x) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y + 1, x))
                    weight += bad_region_penalty_;
                bottom_(y, x) = saturate_cast<int>(weight * 255.f);
            }
            else
                bottom_(y, x) = 0;
        }
    }
}


void GraphCutSeamFinderGpu::setGraphWeightsColorGrad(
        const Mat &img1, const Mat &img2, const Mat &dx1, const Mat &dx2,
        const Mat &dy1, const Mat &dy2, const Mat &mask1, const Mat &mask2, 
        Mat &terminals, Mat &leftT, Mat &rightT, Mat &top, Mat &bottom)
{
    const Size img_size = img1.size();

    terminals.create(img_size, CV_32S);
    leftT.create(Size(img_size.height, img_size.width), CV_32S);
    rightT.create(Size(img_size.height, img_size.width), CV_32S);
    top.create(img_size, CV_32S);
    bottom.create(img_size, CV_32S);

    Mat_<int> terminals_(terminals);
    Mat_<int> leftT_(leftT);
    Mat_<int> rightT_(rightT);
    Mat_<int> top_(top);
    Mat_<int> bottom_(bottom);

    // Set terminal weights
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            float source = mask1.at<uchar>(y, x) ? terminal_cost_ : 0.f;
            float sink = mask2.at<uchar>(y, x) ? terminal_cost_ : 0.f;
            terminals_(y, x) = saturate_cast<int>((source - sink) * 255.f);
        }
    }

    // Set regular edge weights
    const float weight_eps = 1.f;
    for (int y = 0; y < img_size.height; ++y)
    {
        for (int x = 0; x < img_size.width; ++x)
        {
            if (x > 0)
            {
                float grad = dx1.at<float>(y, x - 1) + dx1.at<float>(y, x) +
                             dx2.at<float>(y, x - 1) + dx2.at<float>(y, x) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y, x - 1), img2.at<Point3f>(y, x - 1)) +
                                normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x))) / grad +
                               weight_eps;
                if (!mask1.at<uchar>(y, x - 1) || !mask1.at<uchar>(y, x) ||
                    !mask2.at<uchar>(y, x - 1) || !mask2.at<uchar>(y, x))
                    weight += bad_region_penalty_;
                leftT_(x, y) = saturate_cast<int>(weight * 255.f);
            }
            else
                leftT_(x, y) = 0;

            if (x < img_size.width - 1)
            {
                float grad = dx1.at<float>(y, x) + dx1.at<float>(y, x + 1) +
                             dx2.at<float>(y, x) + dx2.at<float>(y, x + 1) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                                normL2(img1.at<Point3f>(y, x + 1), img2.at<Point3f>(y, x + 1))) / grad +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y, x + 1) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y, x + 1))
                    weight += bad_region_penalty_;
                rightT_(x, y) = saturate_cast<int>(weight * 255.f);
            }
            else
                rightT_(x, y) = 0;

            if (y > 0)
            {
                float grad = dy1.at<float>(y - 1, x) + dy1.at<float>(y, x) +
                             dy2.at<float>(y - 1, x) + dy2.at<float>(y, x) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y - 1, x), img2.at<Point3f>(y - 1, x)) +
                                normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x))) / grad +
                               weight_eps;
                if (!mask1.at<uchar>(y - 1, x) || !mask1.at<uchar>(y, x) ||
                    !mask2.at<uchar>(y - 1, x) || !mask2.at<uchar>(y, x))
                    weight += bad_region_penalty_;
                top_(y, x) = saturate_cast<int>(weight * 255.f);
            }
            else
                top_(y, x) = 0;

            if (y < img_size.height - 1)
            {
                float grad = dy1.at<float>(y, x) + dy1.at<float>(y + 1, x) +
                             dy2.at<float>(y, x) + dy2.at<float>(y + 1, x) + weight_eps;
                float weight = (normL2(img1.at<Point3f>(y, x), img2.at<Point3f>(y, x)) +
                                normL2(img1.at<Point3f>(y + 1, x), img2.at<Point3f>(y + 1, x))) / grad +
                               weight_eps;
                if (!mask1.at<uchar>(y, x) || !mask1.at<uchar>(y + 1, x) ||
                    !mask2.at<uchar>(y, x) || !mask2.at<uchar>(y + 1, x))
                    weight += bad_region_penalty_;
                bottom_(y, x) = saturate_cast<int>(weight * 255.f);
            }
            else
                bottom_(y, x) = 0;
        }
    }
}

#endif

} // namespace detail
} // namespace cv

#endif


================================================
FILE: toolkits/computer_vision/stitch_full_main.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This file contains an example of graphlab used for stitching
 * multiple images into a panorama. The code is based on a example
 * stiching application in OpenCV.
 *
 *  \author Dhruv Batra
 */

/* No adjacency list is externally given here for constructing the graph, 
 * rather an adjacency list is created here for a fully connected graph*/

#include "stitch_main.hpp"

Options opts;

/////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
   
    int64 app_start_time = getTickCount();
    ///////////////////////////////////////////////////////
    // Set up Graphlab
    global_logger().set_log_level(LOG_INFO);
    global_logger().set_log_to_console(true);

    ///! Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;

    ///////////////////////////////////////////////////////
    // Set up OpenCV
    cv::setBreakOnError(true);

    ///////////////////////////////////////////////////////
    // Graphlab parse input
    const std::string description = "Image Stitching";
    graphlab::command_line_options clopts(description);

    string img_dir;
    string graph_path;
    
    clopts.attach_option("img", img_dir,
                         "The directory containing the images");
    clopts.add_positional("img");
    
    clopts.add_positional("graph");
    clopts.attach_option("output", opts.output_dir,
                         "The directory in which to save the output");
    clopts.attach_option("verbose", opts.verbose,
                         "Verbosity of Printing: 0 (default, no printing) or 1 (lots).");
    clopts.attach_option("work_megapix", opts.work_megapix,
                         "Resolution for image registration step. The default is 0.6 Mpx.");
    clopts.attach_option("engine", opts.exec_type,
                         "The type of engine to use {async, sync}.");

    if(!clopts.parse(argc, argv))
    {
        graphlab::mpi_tools::finalize();
        return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
    }
   
    if(img_dir.empty())
    {
        logstream(LOG_ERROR) << "No image directory was provided." << std::endl;
        return EXIT_FAILURE;
    }
       
    if (opts.work_megapix > 10)
    {
        logstream(LOG_ERROR) << "Inappropriate value for work_megapix." << std::endl;
        return EXIT_FAILURE;
    }
   
   
    // display settings  
    dc.cout()
    << "ncpus:          " << clopts.get_ncpus() << std::endl
    << "engine:         " << opts.exec_type << std::endl
    << "scheduler:      " << clopts.get_scheduler_type() << std::endl
    << "img_dir:        " << img_dir << std::endl
    << "work_megapix:   " << opts.work_megapix << std::endl
    << "verbose:        " << opts.verbose << std::endl;
   
   
    ///////////////////////////////////////////////////////
    // Feature Graph
    graph_type graph_feat(dc, clopts);
       
    // load the graph
    graph_loader(dc, graph_feat, img_dir);
    graph_feat.finalize();
   
    ///////////////////////////////////////////////////////
    // Graphlab Engine
    engine_type engine_feat(dc, graph_feat, opts.exec_type, clopts);
   
    ///////////////////////////////////////////////////////
    // Run Aggregator to find size of largest image
    engine_feat.add_vertex_aggregator<ImgArea>("find_largest_img", find_largest_img, set_scales);
    engine_feat.aggregate_now("find_largest_img");

   
    ///////////////////////////////////////////////////////
    // Computer features in parallel on vertices
    graph_feat.transform_vertices(compute_features);

    ///////////////////////////////////////////////////////
    // Match features in parallel on edges
    //graph_feat.transform_edges(match_features);
    // For now, features matching is done in serial manner.

    //if (dc.procid()==0) {
    ///////////////////////////////////////////////////////
    // Compile features
    typedef vector<vertex_data> VecVD;
    VecVD vdlist = engine_feat.map_reduce_vertices<VecVD>(compile_vertices);
   
    vector<ImageFeatures> features(vdlist.size());
    for (size_t i=0; i!=vdlist.size(); ++i)
    {
        features[i] = vdlist[i].features;
    }
       
    int num_images = features.size();
    ////////////////////////////////////////////////////////////////////////////////////////////////////
    
    LOG("Pairwise matching");
    int64 t1 = getTickCount();
    vector<MatchesInfo> pairwise_matches;
    BestOf2NearestMatcher matcher(opts.try_gpu, opts.match_conf);
    matcher(features, pairwise_matches);
    matcher.collectGarbage();
    LOGLN("\nPairwise matching, time: " << ((getTickCount() - t1) / getTickFrequency()) << " sec");
    LOGLN("pairwise_matches.size() = " << pairwise_matches.size() << "\n");
    /*for (size_t i=0; i!=pairwise_matches.size(); ++i) 
    {
        LOGLN("src_img_idx = " << pairwise_matches[i].src_img_idx  << "\t");
        LOGLN("dst_img_idx = " << pairwise_matches[i].dst_img_idx  << "\t");
        LOGLN("matches.size() = " << pairwise_matches[i].matches.size()  << "\t");
        LOGLN("num_inliers = " << pairwise_matches[i].num_inliers  << "\t");
        LOGLN("confidence = " << pairwise_matches[i].confidence  << "\n");
    }*/ ////we can uncomment this part to match output with stitching_detailed similar way
        
    ///////////////////////////////////////////////////////////////////
    /* Compile matches
    typedef vector<edge_data> VecED;
    VecED edlist = engine_feat.map_reduce_edges<VecED>(compile_edges);
   
    if ((opts.verbose > 0) & (dc.procid()==0))
        logstream(LOG_EMPH) << "edlist.size() =  " << edlist.size()
        << "\n"; */
    ///////////////////////////////////////////////////////////////////
    
    //vector<MatchesInfo> pairwise_matches(edlist.size()); //not needed for serial implementation

    int r,c; int pair_idx;
    for (size_t i=0; i!=pairwise_matches.size(); ++i)
    {
        IND2SUB_RM(i,r,c,num_images)
       
        if (r==c)
            continue;
       
        if (r<c)
            pair_idx = i;
        else
            pair_idx = SUB2IND_RM(c,r,num_images);

        pairwise_matches[i] = pairwise_matches[pair_idx];
        pairwise_matches[i].src_img_idx = r;
        pairwise_matches[i].dst_img_idx = c;
       
        if (r>c) // Swap & invert a few things in the match
        {
            if (!pairwise_matches[i].H.empty())
                pairwise_matches[i].H = pairwise_matches[i].H.inv();
           
            for (size_t j = 0; j < pairwise_matches[i].matches.size(); ++j)
                std::swap(pairwise_matches[i].matches[j].queryIdx,
                          pairwise_matches[i].matches[j].trainIdx);
        }
       
        if ((opts.verbose > 0) & (dc.procid()==0))
            logstream(LOG_EMPH) << "#Matches in Pair "
            "(" << pairwise_matches[i].src_img_idx
            << "," << pairwise_matches[i].dst_img_idx << ")"
            << ": (" << pairwise_matches[i].matches.size()
            << "," << pairwise_matches[i].num_inliers
            << "," << pairwise_matches[i].confidence << ")"
            << "\n";

    }
    //edlist.clear();*/ not needed for serial implementation
   
    ///////////////////////////////////////////////////////
    // Leave only images we are sure are from the same panorama
    
    vector<int> indices = leaveBiggestComponent(features, pairwise_matches, opts.conf_thresh);
    vector<string> img_path(indices.size());
    for (size_t i=0; i!=indices.size(); ++i)
    {
        img_path[i] = vdlist[indices[i]].img_path;
    }

      
    ///////////////////////////////////////////////////////
    // Homography-Based Initialization
    int64 t;
    t = getTickCount();
    HomographyBasedEstimator estimator;
    vector<CameraParams> cameras;
    estimator(features, pairwise_matches, cameras);
    logstream(LOG_EMPH) << "Homography-based init, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec\n";
   
    LOGLN("Camera's size: " << cameras.size() << "\n");    //added for testing

    for (size_t i = 0; i < cameras.size(); ++i)
    {
        Mat R;
	cameras[i].R.convertTo(R, CV_32F);
        cameras[i].R = R;
        if (dc.procid() == 0)
            logstream(LOG_EMPH) << "Initial intrinsics #" << i << ":\n" << cameras[i].K() << "\n\n";
    }

    LOGLN("Homography-Based Initialization ended...\n");
   
    ///////////////////////////////////////////////////////
    // Bundle Adjustment
    t = getTickCount();
    Ptr<detail::BundleAdjusterBase> adjuster;
    if (opts.ba_cost_func == "reproj") adjuster = new detail::BundleAdjusterReproj();
    else if (opts.ba_cost_func == "ray") adjuster = new detail::BundleAdjusterRay();
    else
    {
        cout << "Unknown bundle adjustment cost function: '" << opts.ba_cost_func << "'.\n";
        return -1;
    }
    adjuster->setConfThresh(opts.conf_thresh);
    Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
    if (opts.ba_refine_mask[0] == 'x') refine_mask(0,0) = 1;
    if (opts.ba_refine_mask[1] == 'x') refine_mask(0,1) = 1;
    if (opts.ba_refine_mask[2] == 'x') refine_mask(0,2) = 1;
    if (opts.ba_refine_mask[3] == 'x') refine_mask(1,1) = 1;
    if (opts.ba_refine_mask[4] == 'x') refine_mask(1,2) = 1;
    adjuster->setRefinementMask(refine_mask);
    //LOGLN("I will enter adjuster calculation now...\n");
    (*adjuster)(features, pairwise_matches, cameras);
    if (dc.procid() == 0)
        logstream(LOG_EMPH) << "Bundle Adjustment, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec\n";

    ///////////////////////////////////////////////////////
    // Find median focal length    
    vector<double> focals;
    for (size_t i = 0; i < cameras.size(); ++i)
    {
        if (dc.procid() == 0)
            logstream(LOG_EMPH) << "Camera #" << i << ":\n" << cameras[i].K() << "\n\n";
        focals.push_back(cameras[i].focal);
    }
   
    sort(focals.begin(), focals.end());

    if (opts.verbose > 2)
    	LOGLN("Focals size: " << focals.size() << "	focals: " << focals << "\n");    

    if (focals.size() % 2 == 1)
        opts.warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
    else
        opts.warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
   
    ///////////////////////////////////////////////////////
    // Wave-Correction
    vector<Mat> rmats;
    WaveCorrectKind wave_correct; // in future it needs to be added in clopts to change it in runtime
    if (opts.wave_correct_type == "horiz")
	wave_correct = detail::WAVE_CORRECT_HORIZ;
    if (opts.wave_correct_type == "vert")
	wave_correct = detail::WAVE_CORRECT_VERT;
    for (size_t i = 0; i < cameras.size(); ++i)
        rmats.push_back(cameras[i].R);
    waveCorrect(rmats, wave_correct);
    for (size_t i = 0; i < cameras.size(); ++i)
        cameras[i].R = rmats[i];
   
    //} // End of if procid=0

   
    ///////////////////////////////////////////////////////
    // Create a second graph with cameras
    graph_type graph_cam(dc, clopts);

    // load the graph
    if (dc.procid()==0)
	graph_loader(graph_cam, img_dir, cameras, img_path, indices, pairwise_matches);

    graph_cam.finalize();
    vdlist.clear();

    ///////////////////////////////////////////////////////
    // Warp Images in parallel on vertices
    graph_cam.transform_vertices(warp_images);

    ///////////////////////////////////////////////////////
    // Gain Normalize

    ///////////////////////////////////////////////////////
    // Find seams in parallel on edges
    graph_cam.transform_edges(find_seams);

    ///////////////////////////////////////////////////////
    // Composite Images in parallel on vertices
    graph_cam.transform_vertices(composite_images);
   

    ///////////////////////////////////////////////////////
    // Second Graphlab Engine
    engine_type engine_cam(dc, graph_cam, opts.exec_type, clopts);

    ///////////////////////////////////////////////////////
    // blend images, gather vertices
    VecVD veclist = engine_cam.map_reduce_vertices<VecVD>(compile_vertices);
    vector<Point> corner(veclist.size());
    vector<Mat> img_warped(veclist.size());
    vector<Mat> mask_warped(veclist.size());
    vector<Size> size(veclist.size());
    Mat img_warped_s;

    for (size_t i=0; i!=veclist.size(); ++i)
    {
        corner[i] = veclist[i].corner;

	if (opts.verbose > 2)
            LOGLN("Blending corners x : " << veclist[i].corner.x << "   y : " << veclist[i].corner.y << "\n");

        img_warped[i] = veclist[i].img_warped;
        mask_warped[i] = veclist[i].mask_warped;
        size[i] = veclist[i].warp_size;

        if (opts.verbose > 2)
            LOGLN("Blending sizes height : " << size[i].height << "   width : " << size[i].width << "\n");
    }
    veclist.clear();
   
    num_images = corner.size();
    
    Ptr<Blender> blender;
    
    int blend_type;
    if (opts.blending_type == "no")
	blend_type = Blender::NO;
    if (opts.blending_type == "feather")
	blend_type = Blender::FEATHER;
    if (opts.blending_type == "multiband")
        blend_type = Blender::MULTI_BAND;

    bool try_gpu = false;
            
    if (blender.empty())
    {
        blender = Blender::createDefault(blend_type, try_gpu);
        Size dst_sz = resultRoi(corner, size).size();
        float blend_width = sqrt(static_cast<float>(dst_sz.area())) * opts.blend_strength / 100.f;

        if (blend_width < 1.f)
            blender = Blender::createDefault(Blender::NO, try_gpu);
        else if (blend_type == Blender::MULTI_BAND)
        {
            MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(static_cast<Blender*>(blender));
            mb->setNumBands(static_cast<int>(ceil(log(blend_width)/log(2.)) - 1.));
            LOGLN("Multi-band blender, number of bands: " << mb->numBands());
        }
        else if (blend_type == Blender::FEATHER)
        {
            FeatherBlender* fb = dynamic_cast<FeatherBlender*>(static_cast<Blender*>(blender));
            fb->setSharpness(1.f/blend_width);
            LOGLN("Feather blender, sharpness: " << fb->sharpness());
        }
        blender->prepare(corner, size);
    }

    // Blend all images 
    for (int j=0; j!=num_images; ++j)
    {
	img_warped[j].convertTo(img_warped_s, CV_16S);
        blender->feed(img_warped_s, mask_warped[j], corner[j]);
        img_warped_s.release();
    }
    
    Mat result, result_mask, resized_result;
    blender->blend(result, result_mask);

    resize(result, resized_result, Size(), opts.output_scale, opts.output_scale);
    imwrite(opts.result_name, resized_result);
           
   // imwrite(opts.result_name, result);
    LOGLN("Finished, total time: " << ((getTickCount() - app_start_time) / getTickFrequency()) << " sec");

    ///////////////////////////////////////////////////////
    // Run everything
//    engine.signal_all();
//    graphlab::timer timer;
//    engine.start();  
//    const double runtime = timer.current_time();
//    dc.cout()
//    << "----------------------------------------------------------" << std::endl
//    << "Final Runtime (seconds):   " << runtime
//    << std::endl
//    << "Updates executed: " << engine.num_updates() << std::endl
//    << "Update Rate (updates/second): "
//    << engine.num_updates() / runtime << std::endl;
       
}


================================================
FILE: toolkits/computer_vision/stitch_grlab.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This file contains an example of graphlab used for stitching
 * multiple images into a panorama. The code is based on a example
 * stiching application in OpenCV.
 *
 *  \author Dhruv Batra
 */


#ifndef __STITCH_GRLAB_HPP__
#define __STITCH_GRLAB_HPP__


#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <math.h>
#include <unistd.h>


#include <Eigen/Dense>

#include <graphlab.hpp>
#include <graphlab/util/fs_util.hpp>


#include "eigen_serialization.hpp"
#include "opencv_serialization.hpp"
#include "stitch_opts.hpp"
#include "utils.hpp"
//#include "seam_finders_gr.hpp"
#include "precomp.hpp"

#include <graphlab/macros_def.hpp>

#include "opencv2/opencv_modules.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/util.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"

using namespace std;
using namespace cv;
using namespace cv::detail;

typedef Eigen::VectorXd vec;
typedef Eigen::MatrixXd mat;


/////////////////////////////////////////////////////////////////////////
// Edge and Vertex data and Graph Type
struct vertex_data
{
    bool empty; // used to quickly check if this is a dummy vertex.
   
    // path to image
    std::string img_path;
   
    cv::Mat full_img;       // Original image
    cv::Mat img;            // Used for feature computation
    cv::Mat img_warped;     // Used by gain compensator
    cv::Mat img_warped_f;   // Used by seam_finder
   
    cv::Size full_img_size;

    cv:: Size warp_size;
        
    cv::detail::ImageFeatures features;
   
    cv::detail::CameraParams camera;
   
    cv::Point2f corner;
    //cv::Mat mask;
    cv::Mat mask_warped;
   
    // constructor
    vertex_data() : empty(true)
    { }
   
    void save(graphlab::oarchive& arc) const
    {
        arc << empty << img_path
        << full_img << img << img_warped << img_warped_f
        << full_img_size << warp_size << features << camera
        << corner
        << mask_warped;
    }
    void load(graphlab::iarchive& arc)
    {
        arc >> empty >> img_path
        >> full_img >> img >> img_warped >> img_warped_f
        >> full_img_size >> warp_size >> features >> camera
        >> corner 
        >> mask_warped;
    }
   //>> mask
    vertex_data operator+ (vertex_data& othervertex)
    {
        vertex_data sum;
       
        if (!empty && !othervertex.empty)
        {
            logstream(LOG_ERROR) << "Don't know about to merge two non-empty vertex-data structures" << std::endl;
            //return EXIT_FAILURE;
        }
        else if (!empty && othervertex.empty)
            sum = *this;
        else if (empty && !othervertex.empty)
            sum = othervertex;        
        // Nothing to do if both empty.
       
        return sum;
    }

    vertex_data& operator+= (const vertex_data& othervertex)
    {
        if (!empty && !othervertex.empty)
        {
            logstream(LOG_ERROR) << "Don't know about to merge two non-empty vertex-data structures" << std::endl;
            //return EXIT_FAILURE;
        }
        else if (empty && !othervertex.empty)
            *this = othervertex;        
        // Nothing to do if both empty or othervertex empty.
       
        return *this;
    }
}; // End of vertex data


//typedef graphlab::empty edge_data;
struct edge_data
{
    bool empty; // used to quickly check if this is a dummy edge.

    cv::detail::MatchesInfo matchinfo;
   
    // constructor
    edge_data() : empty(true)
    { }
   
    void save(graphlab::oarchive& arc) const
    {
        arc << empty << matchinfo;
    }
    void load(graphlab::iarchive& arc)
    {
        arc >> empty >> matchinfo;
    }

    edge_data operator+ (edge_data& otheredge)
    {
        edge_data sum;
       
        if (!empty && !otheredge.empty)
        {
            logstream(LOG_ERROR) << "Don't know about to merge two non-empty edge-data structures" << std::endl;
            //return EXIT_FAILURE;
        }
        else if (!empty && otheredge.empty)
            sum = *this;
        else if (empty && !otheredge.empty)
            sum = otheredge;        
        // Nothing to do if both empty.
       
        return sum;
    }

    edge_data& operator+= (const edge_data& otheredge)
    {
        if (!empty && !otheredge.empty)
        {
            logstream(LOG_ERROR) << "Don't know about to merge two non-empty edge-data structures" << std::endl;
            //return EXIT_FAILURE;
        }
        else if (empty && !otheredge.empty)
            *this = otheredge;        
        // Nothing to do if both empty or othervertex empty.
       
        return *this;
    }
}; // End of edge data

/**
 * The graph type
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/////////////////////////////////////////////////////////////////////////
// GraphLab Vertex Program (and Gather Type)
/**
 * The type passed around during the gather phase
 */
//struct gather_type
//{
//    
//    gather_type& operator+=(const gather_type& other)
//    {
//    } // end of operator +=
//    void save(graphlab::oarchive& arc) const
//    {
//        arc << delf_i << delf_j;
//    }
//    void load(graphlab::iarchive& arc)
//    {
//        arc >> delf_i >> delf_j;
//    }
//}; // end of gather type
typedef graphlab::empty gather_type;

/**
 * The core stitching update function.  
 */
class stitch_vertex_program :
public graphlab::ivertex_program<graph_type, gather_type,
graphlab::messages::sum_priority>,
public graphlab::IS_POD_TYPE
{
private:
   
public:
   
    stitch_vertex_program()  { }
   
    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const
    {
        return graphlab::ALL_EDGES;
    }; // end of gather_edges

    // Run the gather operation over all in edges
    gather_type gather(icontext_type& context, const vertex_type& target_vertex,
                       edge_type& edge) const
    {
        return gather_type();
    } // end of gather

    void apply(icontext_type& context, vertex_type& vertex,
               const gather_type& sum)
    {
       
        // Get vertex data
        vertex_data &vdata = vertex.data();

        logstream(LOG_EMPH) << "Features in image #" << vertex.id() << ": " << vdata.features.keypoints.size() << "\n";
    } // end of apply

    edge_dir_type scatter_edges(icontext_type& context,
                                const vertex_type& vertex) const
    {
        //return graphlab::ALL_EDGES;
        return graphlab::NO_EDGES;
    }; // end of gather_edges
   
};


/**
 * Define the engine type
 */
//typedef graphlab::synchronous_engine<mplp_vertex_program> engine_type;
//typedef graphlab::async_consistent_engine<stitch_vertex_program> engine_type;
typedef graphlab::omni_engine<stitch_vertex_program> engine_type;

#endif


================================================
FILE: toolkits/computer_vision/stitch_main.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 */


/**
 *
 * \brief This file contains an example of graphlab used for stitching
 * multiple images into a panorama. The code is based on a example
 * stiching application in OpenCV.
 *
 *  \author Dhruv Batra
 */


#include "stitch_main.hpp"

Options opts;

/////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
   
    int64 app_start_time = getTickCount();
    ///////////////////////////////////////////////////////
    // Set up Graphlab
    global_logger().set_log_level(LOG_INFO);
    global_logger().set_log_to_console(true);

    ///! Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;

    ///////////////////////////////////////////////////////
    // Set up OpenCV
    cv::setBreakOnError(true);

    ///////////////////////////////////////////////////////
    // Graphlab parse input
    const std::string description = "Image Stitching";
    graphlab::command_line_options clopts(description);

    string img_dir;
    string graph_path;
    
    clopts.attach_option("img", img_dir,
                         "The directory containing the images");
    clopts.add_positional("img");
    clopts.attach_option("graph", graph_path,
                         "The path to the adjacency list file (could be the prefix in case of multiple files)");
    clopts.add_positional("graph");
    clopts.attach_option("output", opts.output_dir,
                         "The directory in which to save the output");
    clopts.attach_option("verbose", opts.verbose,
                         "Verbosity of Printing: 0 (default, no printing) or 1 (lots).");
    clopts.attach_option("work_megapix", opts.work_megapix,
                         "Resolution for image registration step. The default is 0.6 Mpx.");
    clopts.attach_option("engine", opts.exec_type,
                         "The type of engine to use {async, sync}.");

    if(!clopts.parse(argc, argv))
    {
        graphlab::mpi_tools::finalize();
        return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
    }
   
    if(img_dir.empty())
    {
        logstream(LOG_ERROR) << "No image directory was provided." << std::endl;
        return EXIT_FAILURE;
    }
   
    if(graph_path.empty())
    {
        logstream(LOG_ERROR) << "No adjacency file provided." << std::endl;
        return EXIT_FAILURE;
    }
   
    if (opts.work_megapix > 10)
    {
        logstream(LOG_ERROR) << "Inappropriate value for work_megapix." << std::endl;
        return EXIT_FAILURE;
    }
   
   
    // display settings  
    dc.cout()
    << "ncpus:          " << clopts.get_ncpus() << std::endl
    << "engine:         " << opts.exec_type << std::endl
    << "scheduler:      " << clopts.get_scheduler_type() << std::endl
    << "img_dir:        " << img_dir << std::endl
    << "graph_path:     " << graph_path << std::endl
    << "work_megapix:   " << opts.work_megapix << std::endl
    << "verbose:        " << opts.verbose << std::endl;
   
   
    ///////////////////////////////////////////////////////
    // Feature Graph
    graph_type graph_feat(dc, clopts);
       
    // load the graph
    //graph.load(img_dir, vertex_loader);
    vertex_loader(dc, graph_feat, img_dir);
    graph_feat.load(graph_path, edge_loader);
    graph_feat.finalize();
   
    ///////////////////////////////////////////////////////
    // Graphlab Engine
    engine_type engine_feat(dc, graph_feat, opts.exec_type, clopts);
   
    ///////////////////////////////////////////////////////
    // Run Aggregator to find size of largest image
    engine_feat.add_vertex_aggregator<ImgArea>("find_largest_img", find_largest_img, set_scales);
    engine_feat.aggregate_now("find_largest_img");

   
    ///////////////////////////////////////////////////////
    // Computer features in parallel on vertices
    graph_feat.transform_vertices(compute_features);

    ///////////////////////////////////////////////////////
    // Match features in parallel on edges
    graph_feat.transform_edges(match_features);

   
    //if (dc.procid()==0) {
    ///////////////////////////////////////////////////////
    // Compile features
    typedef vector<vertex_data> VecVD;
    VecVD vdlist = engine_feat.map_reduce_vertices<VecVD>(compile_vertices);
   
    vector<ImageFeatures> features(vdlist.size());
    for (size_t i=0; i!=vdlist.size(); ++i)
    {
        features[i] = vdlist[i].features;
    }
    vdlist.clear();
   
    int num_images = features.size();
   
    ///////////////////////////////////////////////////////
    // Compile matches
    typedef vector<edge_data> VecED;
    VecED edlist = engine_feat.map_reduce_edges<VecED>(compile_edges);
   
    if ((opts.verbose > 0) & (dc.procid()==0))
        logstream(LOG_EMPH) << "edlist.size() =  " << edlist.size()
        << "\n";


    vector<MatchesInfo> pairwise_matches(edlist.size());
    int r,c; int pair_idx;
    for (size_t i=0; i!=edlist.size(); ++i)
    {
        IND2SUB_RM(i,r,c,num_images)
       
        if (r==c)
            continue;
       
        if (r<c)
            pair_idx = i;
        else
            pair_idx = SUB2IND_RM(c,r,num_images);

        pairwise_matches[i] = edlist[pair_idx].matchinfo;
        pairwise_matches[i].src_img_idx = r;
        pairwise_matches[i].dst_img_idx = c;
       
        if (r>c) // Swap & invert a few things in the match
        {
            if (!pairwise_matches[i].H.empty())
                pairwise_matches[i].H = pairwise_matches[i].H.inv();
           
            for (size_t j = 0; j < pairwise_matches[i].matches.size(); ++j)
                std::swap(pairwise_matches[i].matches[j].queryIdx,
                          pairwise_matches[i].matches[j].trainIdx);
        }
       
        if ((opts.verbose > 0) & (dc.procid()==0))
            logstream(LOG_EMPH) << "#Matches in Pair "
            "(" << pairwise_matches[i].src_img_idx
            << "," << pairwise_matches[i].dst_img_idx << ")"
            << ": (" << pairwise_matches[i].matches.size()
            << "," << pairwise_matches[i].num_inliers
            << "," << pairwise_matches[i].confidence << ")"
            << "\n";

    }
    edlist.clear();

    ///////////////////////////////////////////////////////
    // Homography-Based Initialization
    int64 t;
    t = getTickCount();
    HomographyBasedEstimator estimator;
    vector<CameraParams> cameras;
    estimator(features, pairwise_matches, cameras);
    logstream(LOG_EMPH) << "Homography-based init, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec\n";
   
    for (size_t i = 0; i < cameras.size(); ++i)
    {
        Mat R;
        cameras[i].R.convertTo(R, CV_32F);
        cameras[i].R = R;
        if (dc.procid() == 0)
            logstream(LOG_EMPH) << "Initial intrinsics #" << i << ":\n" << cameras[i].K() << "\n\n";
    }

   
    ///////////////////////////////////////////////////////
    // Bundle Adjustment
    t = getTickCount();
    Ptr<detail::BundleAdjusterBase> adjuster;
    //adjuster = new detail::BundleAdjusterRay();
    if (opts.ba_cost_func == "reproj") adjuster = new detail::BundleAdjusterReproj();
    else if (opts.ba_cost_func == "ray") adjuster = new detail::BundleAdjusterRay();
    else
    {
        cout << "Unknown bundle adjustment cost function: '" << opts.ba_cost_func << "'.\n";
        return -1;
    }
    adjuster->setConfThresh(opts.conf_thresh);
    Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
    if (opts.ba_refine_mask[0] == 'x') refine_mask(0,0) = 1;
    if (opts.ba_refine_mask[1] == 'x') refine_mask(0,1) = 1;
    if (opts.ba_refine_mask[2] == 'x') refine_mask(0,2) = 1;
    if (opts.ba_refine_mask[3] == 'x') refine_mask(1,1) = 1;
    if (opts.ba_refine_mask[4] == 'x') refine_mask(1,2) = 1;
    adjuster->setRefinementMask(refine_mask);
    (*adjuster)(features, pairwise_matches, cameras);
    if (dc.procid() == 0)
        logstream(LOG_EMPH) << "Bundle Adjustment, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec\n";

    ///////////////////////////////////////////////////////
    // Find median focal length    
    vector<double> focals;
    for (size_t i = 0; i < cameras.size(); ++i)
    {
        if (dc.procid() == 0)
            logstream(LOG_EMPH) << "Camera #" << i << ":\n" << cameras[i].K() << "\n\n";
        focals.push_back(cameras[i].focal);
    }
   
    sort(focals.begin(), focals.end());

    if (opts.verbose > 2)
    	LOGLN("Focals size: " << focals.size() << "	focals: " << focals << "\n");    

    if (focals.size() % 2 == 1)
        opts.warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
    else
        opts.warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;
   
    ///////////////////////////////////////////////////////
    // Wave-Correction
    vector<Mat> rmats;
    WaveCorrectKind wave_correct; // in future it needs to be added in clopts to change it in runtime
    if (opts.wave_correct_type == "horiz")
	wave_correct = detail::WAVE_CORRECT_HORIZ;
    if (opts.wave_correct_type == "vert")
	wave_correct = detail::WAVE_CORRECT_VERT;
    for (size_t i = 0; i < cameras.size(); ++i)
        rmats.push_back(cameras[i].R);
    waveCorrect(rmats, wave_correct);
    for (size_t i = 0; i < cameras.size(); ++i)
        cameras[i].R = rmats[i];
   
    //} // End of if procid=0

   
    ///////////////////////////////////////////////////////
    // Create a second graph with cameras
    graph_type graph_cam(dc, clopts);

    // load the graph
    if (dc.procid()==0) {
    vertex_loader(graph_cam, img_dir, cameras);
    graph_cam.load(graph_path, edge_loader);
    }
    graph_cam.finalize();

    ///////////////////////////////////////////////////////
    // Warp Images in parallel on vertices
    graph_cam.transform_vertices(warp_images);

    ///////////////////////////////////////////////////////
    // Gain Normalize

    ///////////////////////////////////////////////////////
    // Find seams in parallel on edges
    graph_cam.transform_edges(find_seams);

    ///////////////////////////////////////////////////////
    // Composite Images in parallel on vertices
    graph_cam.transform_vertices(composite_images);
   

    ///////////////////////////////////////////////////////
    // Second Graphlab Engine
    engine_type engine_cam(dc, graph_cam, opts.exec_type, clopts);

    ///////////////////////////////////////////////////////
    // blend images, gather vertices
    VecVD veclist = engine_cam.map_reduce_vertices<VecVD>(compile_vertices);
    vector<Point> corner(veclist.size());
    vector<Mat> img_warped(veclist.size());
    vector<Mat> mask_warped(veclist.size());
    vector<Size> size(veclist.size());
    Mat img_warped_s;

    for (size_t i=0; i!=veclist.size(); ++i)
    {
        corner[i] = veclist[i].corner;

	if (opts.verbose > 2)
            LOGLN("Blending corners x : " << veclist[i].corner.x << "   y : " << veclist[i].corner.y << "\n");

        img_warped[i] = veclist[i].img_warped;
        mask_warped[i] = veclist[i].mask_warped;
        size[i] = veclist[i].warp_size;

        if (opts.verbose > 2)
            LOGLN("Blending sizes height : " << size[i].height << "   width : " << size[i].width << "\n");
    }
    veclist.clear();
   
    num_images = corner.size();
    
    Ptr<Blender> blender;
    
    int blend_type;
    if (opts.blending_type == "no")
	blend_type = Blender::NO;
    if (opts.blending_type == "feather")
	blend_type = Blender::FEATHER;
    if (opts.blending_type == "multiband")
        blend_type = Blender::MULTI_BAND;

    bool try_gpu = false;
            
    if (blender.empty())
    {
        blender = Blender::createDefault(blend_type, try_gpu);
        Size dst_sz = resultRoi(corner, size).size();
        float blend_width = sqrt(static_cast<float>(dst_sz.area())) * opts.blend_strength / 100.f;
        if (blend_width < 1.f)
            blender = Blender::createDefault(Blender::NO, try_gpu);
        else if (blend_type == Blender::MULTI_BAND)
        {
            MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(static_cast<Blender*>(blender));
            mb->setNumBands(static_cast<int>(ceil(log(blend_width)/log(2.)) - 1.));
            LOGLN("Multi-band blender, number of bands: " << mb->numBands());
        }
        else if (blend_type == Blender::FEATHER)
        {
            FeatherBlender* fb = dynamic_cast<FeatherBlender*>(static_cast<Blender*>(blender));
            fb->setSharpness(1.f/blend_width);
            LOGLN("Feather blender, sharpness: " << fb->sharpness());
        }
        blender->prepare(corner, size);
    }

    // Blend all images 
    for (int j=0; j!=num_images; ++j)
    {
	img_warped[j].convertTo(img_warped_s, CV_16S);
        blender->feed(img_warped_s, mask_warped[j], corner[j]);
        img_warped_s.release();
    }
    
    Mat result, result_mask;
    blender->blend(result, result_mask);

    imwrite(opts.result_name, result);
    
    LOGLN("Finished, total time: " << ((getTickCount() - app_start_time) / getTickFrequency()) << " sec");

    ///////////////////////////////////////////////////////
    // Run everything
//    engine.signal_all();
//    graphlab::timer timer;
//    engine.start();  
//    const double runtime = timer.current_time();
//    dc.cout()
//    << "----------------------------------------------------------" << std::endl
//    << "Final Runtime (seconds):   " << runtime
//    << std::endl
//    << "Updates executed: " << engine.num_updates() << std::endl
//    << "Update Rate (updates/second): "
//    << engine.num_updates() / runtime << std::endl;
       
}


================================================
FILE: toolkits/computer_vision/stitch_main.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This file contains an example of graphlab used for stitching
 * multiple images into a panorama. The code is based on a example
 * stiching application in OpenCV.
 *
 *  \author Dhruv Batra
 */


#ifndef __STITCH_MAIN_HPP__
#define __STITCH_MAIN_HPP__

#include "utils.hpp"
#include "stitch_grlab.hpp"
#include "iostream"

/////////////////////////////////////////////////////////////////////////
// Graph Loader (used to read images and load the vertex and edge data of
// the graph. There is an edge between every pair of vertices in the graph,  
// i.e. the graph is fully connected. (Used in stitch_full_main)

bool graph_loader(graphlab::distributed_control& dc, graph_type& graph, string img_dir)
{
    // force a "/" at the end of the path
    // make sure to check that the path is non-empty. (you do not
    // want to make the empty path "" the root path "/" )
    string path = img_dir;
    if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
    
    vector<string> graph_files;
    string search_prefix;
    graphlab::fs_util::list_files_with_prefix(path, search_prefix, graph_files);
    
    if (graph_files.size() == 0)
        logstream(LOG_WARNING) << "No files found in " << path << std::endl;
    
    
    if (opts.verbose > 2)
        logstream(LOG_EMPH)
        << "Total number of images: " << graph_files.size() << "\n";
    
    // vertex data & id
    graphlab::vertex_id_type vid(-1);
    graphlab::vertex_id_type other_vid;
    
    ///////////////////////////////////////////////////////
    // Loop over files
    for(size_t i = 0; i < graph_files.size(); ++i)
    {
        // Each machine loads corresponding file
        if (i % dc.numprocs() == dc.procid())
        {
            if (opts.verbose > 0)
                logstream(LOG_EMPH)
                << "Process: " << dc.procid() << "/" << dc.numprocs() << " "
                << "picked image: " << graph_files[i] << "\n";
            
            
            vid = i;
            vertex_data vdata;
            vdata.empty = false;
            vdata.img_path = graph_files[i];
            vdata.features.img_idx = i;
            
            graph.add_vertex(vid, vdata);
            
            if (opts.verbose > 2)
                logstream(LOG_EMPH)
                << "Vertex " << i << " Image: " << vdata.img_path << "\n"; 
            
        }
    }
    
    // Adding edges between every pair of vertices to create a fully connected graph
    // no duplicate edges are added
    for(size_t i = 0; i < graph_files.size()-1; ++i)
    {
        vid = i;
        for(size_t j = i+1; j < graph_files.size(); ++j)
        {
            other_vid = j;
            if (opts.verbose > 0)
                logstream(LOG_EMPH) << "Adding edge: (" << vid << "," << other_vid << ")\n";
            
            edge_data edata; edata.empty = false;
            graph.add_edge(vid,other_vid,edata);
        }
    }
    return true;
}

// Second Graph loader that only a single machine calls and pre-loads cameras.
// It adds only the selected edges to the subgraph of the fully connected graph.
// (Used in stitch_full_main)
bool graph_loader(graph_type& graph, string img_dir, vector<CameraParams>& cameras, 
                  vector<string>& img_path, vector<int> indices, vector<MatchesInfo>& pairwise_matches)
{
    // force a "/" at the end of the path
    // make sure to check that the path is non-empty. (you do not
    // want to make the empty path "" the root path "/" )
    string path = img_dir;
    if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
    
    // vertex data & id   
    graphlab::vertex_id_type vid(-1);
    graphlab::vertex_id_type other_vid;
    
    if (opts.verbose > 2)
        logstream(LOG_EMPH)
        << "Total number of vertices in the second graph: " << indices.size() << "\n";
    
    ///////////////////////////////////////////////////////
    // Loop over files
    for(size_t i = 0; i < indices.size(); ++i)
    {
        vid = i;
        vertex_data vdata;
        vdata.empty = false;
        vdata.img_path = img_path[i];
        vdata.features.img_idx = i;
        
        vdata.camera = cameras[i]; // addition to above function.
        
        graph.add_vertex(vid, vdata);
    }
    
    // Adding edges between selected pair of vertices to create a subgraph of the fully connected graph
    // no duplicate edges are added
    
    if (opts.verbose > 2)
        logstream(LOG_EMPH)
        << "Pairwise_matches size : " << pairwise_matches.size() << "\n";
    
    for(size_t i = 0; i < pairwise_matches.size(); ++i)
    {
        if (opts.verbose > 2)
            logstream(LOG_EMPH)
            << "Pairwise_matches : (" << pairwise_matches[i].src_img_idx << "," << pairwise_matches[i].dst_img_idx << ")\n";
        
        if (pairwise_matches[i].src_img_idx >= 0 && pairwise_matches[i].dst_img_idx >= 0)
        {
            if (pairwise_matches[i].src_img_idx < pairwise_matches[i].dst_img_idx)	// no duplicate edges are allowed
            {
                vid = pairwise_matches[i].src_img_idx;
                other_vid = pairwise_matches[i].dst_img_idx;
                if (opts.verbose > 0)
            	    logstream(LOG_EMPH) << "Adding edge: (" << vid << "," << other_vid << ")\n";
                
                edge_data edata; edata.empty = false;
                graph.add_edge(vid,other_vid,edata);
            }
        }
    }
    
    return true;
}

/////////////////////////////////////////////////////////////////////////
// Vertex Loader (used to read images and load the vertex data of the graph)
//bool vertex_loader(graph_type& graph, const std::string& fname,
//                   const std::string& line)
bool vertex_loader(graphlab::distributed_control& dc, graph_type& graph, string img_path)
{
    // force a "/" at the end of the path
    // make sure to check that the path is non-empty. (you do not
    // want to make the empty path "" the root path "/" )
    string path = img_path;
    if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
    
    vector<string> graph_files;
    string search_prefix;
    graphlab::fs_util::list_files_with_prefix(path, search_prefix, graph_files);
    
    if (graph_files.size() == 0)
        logstream(LOG_WARNING) << "No files found in " << path << std::endl;
    
    // vertex data & id
    graphlab::vertex_id_type vid(-1);
    
    ///////////////////////////////////////////////////////
    // Loop over files
    for(size_t i = 0; i < graph_files.size(); ++i)
    {
        // Each machine loads corresponding file
        if (i % dc.numprocs() == dc.procid())
        {
            if (opts.verbose > 0)
                logstream(LOG_EMPH)
                << "Process: " << dc.procid() << "/" << dc.numprocs() << " "
                << "picked image: " << graph_files[i] << "\n";
            
            
            vid = i;
            vertex_data vdata;
            vdata.empty = false;
            vdata.img_path = graph_files[i];
            vdata.features.img_idx = i;
            
            graph.add_vertex(vid, vdata);
            
        }
    }
    
    return true;
}

// Second loader that only a single machine calls and pre-loads cameras.
bool vertex_loader(graph_type& graph, string img_path, vector<CameraParams>& cameras)
{
    // force a "/" at the end of the path
    // make sure to check that the path is non-empty. (you do not
    // want to make the empty path "" the root path "/" )
    string path = img_path;
    if (path.length() > 0 && path[path.length() - 1] != '/') path = path + "/";
    
    vector<string> graph_files;
    string search_prefix;
    graphlab::fs_util::list_files_with_prefix(path, search_prefix, graph_files);
    
    if (graph_files.size() == 0)
        logstream(LOG_WARNING) << "No files found in " << path << std::endl;
    
    // vertex data & id
    graphlab::vertex_id_type vid(-1);
    
    ///////////////////////////////////////////////////////
    // Loop over files
    for(size_t i = 0; i < graph_files.size(); ++i)
    {
        vid = i;
        vertex_data vdata;
        vdata.empty = false;
        vdata.img_path = graph_files[i];
        vdata.features.img_idx = i;
        
        vdata.camera = cameras[i]; // addition to above function.
        
        graph.add_vertex(vid, vdata);
    }
    
    return true;
}

/////////////////////////////////////////////////////////////////////////
// Edge Loader (used to read the adjacency list and add edges to the graph)
bool edge_loader(graph_type& graph, const std::string& fname,
                 const std::string& textline)
{
    if ( textline.length() == 0 || textline[0] == '#' )
        return true; // empty or comment line, return
    
    std::stringstream strm(textline);
    graphlab::vertex_id_type vid;
    
    // first entry in the line is a vertex ID
    strm >> vid;
    
    if (opts.verbose > 0)
        logstream(LOG_EMPH) << "Here's the input: "
        << textline << "\n"
        << vid << "\n";
    
    // Line should contain at least 1 more number (degree of node)
    if (!strm.good())
    {
        logstream(LOG_ERROR) << "The following ajacency list line is incomplete(check adj_list standard):\n"
        << textline << std::endl;
        return EXIT_FAILURE;
    }
    
    // second entry is the out-degree
    int outdeg;
    strm >> outdeg;
    
    graphlab::vertex_id_type other_vid;
    for (int i=0; i!=outdeg; ++i)
    {
        // Line should contain more numbers (id of neighbours)
        if (!strm.good())
        {
            logstream(LOG_ERROR) << "The following ajacency list line is incomplete(check adj_list standard):\n"
            << textline << std::endl;
            return EXIT_FAILURE;
        }
        
        strm >> other_vid;
        
        // only add edges in one direction
        if (other_vid < vid)
            continue;
        
        if (opts.verbose > 0)
            logstream(LOG_EMPH) << "Adding edge: (" << vid << "," << other_vid << ")\n";
        
        edge_data edata; edata.empty = false;
        graph.add_edge(vid,other_vid,edata);
    }
    
    return true;
}
/////////////////////////////////////////////////////////////////////////
// Map-Aggregator function to find the largest image size
struct ImgArea
{
    double full_img_area;
    
    void save(graphlab::oarchive& arc) const
    {
        arc << full_img_area;
    }
    void load(graphlab::iarchive& arc)
    {
        arc >> full_img_area;
    }
    
    ImgArea operator+= (ImgArea other) // computes max
    {
        ImgArea max;
        max.full_img_area =
        (full_img_area > other.full_img_area) ? full_img_area : other.full_img_area;
        return max;
    }
};

ImgArea find_largest_img(engine_type::icontext_type& context,
                          graph_type::vertex_type& vertex)
{
    // Get vertex data
    vertex_data &vdata = vertex.data();
    
    // Ignore hdfs-setup for now. Just read from file directly.
    Mat full_img = imread(vdata.img_path);
    
    ImgArea imgarea;
    imgarea.full_img_area = full_img.size().area();
    
    if (opts.verbose > 2)
        LOGLN("largest image area: " << imgarea.full_img_area << "\n");
    
    return imgarea;
}

void set_scales(engine_type::icontext_type& context, ImgArea& largestimg)
{
    
    if (opts.work_megapix > 0)
        opts.work_scale = min(1.0, sqrt(opts.work_megapix * 1e6 / largestimg.full_img_area));
    
    if (opts.seam_megapix > 0)
	opts.seam_scale = min(1.0, sqrt(opts.seam_megapix * 1e6 / largestimg.full_img_area));
    
    if (opts.compose_megapix > 0)
        opts.compose_scale = min(1.0, sqrt(opts.compose_megapix * 1e6 / largestimg.full_img_area));

    if (opts.output_megapix > 0)
        opts.output_scale = min(1.0, sqrt(opts.output_megapix * 1e6 / largestimg.full_img_area));
    
    opts.seam_work_aspect = opts.seam_scale / opts.work_scale;
    opts.compose_seam_aspect = opts.compose_scale / opts.seam_scale;
    opts.compose_work_aspect = opts.compose_scale / opts.work_scale;
  //  opts.output_work_aspect = opts.output_scale / opts.work_scale;
}


/////////////////////////////////////////////////////////////////////////
// Function to extract features in vertex parallel
void compute_features(graph_type::vertex_type& vertex)
{
    // Get vertex data
    vertex_data &vdata = vertex.data();
    
    // Load image
    //            // open the stream
    //            std::ifstream in_file(graph_files[i].c_str(),
    //                                  std::ios_base::in | std::ios_base::binary);
    //
    //            boost::iostreams::filtering_stream<boost::iostreams::input> fin;  
    //            fin.push(in_file);
    //            
    //            // Get data from stream into a buffer
    //            fin.pop();
    
    // Ignore the above hdfs-setup for now. Just read from file directly.
    Mat &full_img = vdata.full_img;
    Mat &img = vdata.img;
    full_img = imread(vdata.img_path);
    
    if ( abs(opts.work_scale-1) > 1e-3 )
        resize(full_img, img, Size(), opts.work_scale, opts.work_scale);
    else
        img = full_img;
    
    if (img.empty())
        logstream(LOG_ERROR) << "Could not imread image: " << vdata.img_path << "\n";
    
    // compute features
    SurfFeaturesFinder finder;
    finder(img, vdata.features);
    finder.collectGarbage();
    
    if (opts.verbose > 0)
    {
        logstream(LOG_EMPH) << "Features in image #" << vertex.id() << ": " << vdata.features.keypoints.size() << "\n";
        LOGLN("Size of feature image #" << img.cols << "  " << img.rows); //
    }
    
}

/////////////////////////////////////////////////////////////////////////
// Function to compute feature-matches in parallel on edges
void match_features(graph_type::edge_type& edge)
{
    // Get edge data
    edge_data &edata = edge.data();
    
    // Get vertex ids of two vertices involved
    vertex_data &vdata1 = edge.source().data();
    vertex_data &vdata2 = edge.target().data();
    
    // Match features
    BestOf2NearestMatcher matcher;
    matcher(vdata1.features, vdata2.features, edata.matchinfo);
    matcher.collectGarbage();
    
    if (opts.verbose > 0)
        logstream(LOG_EMPH) << "#Matches in Image Pair "
        "(" << edge.source().id() << "," << edge.target().id() << ")"
        << ": (" << edata.matchinfo.matches.size()
        << "," << edata.matchinfo.num_inliers << ")"
        << "\n";
}

/////////////////////////////////////////////////////////////////////////
// Function to warp images in parallel
void warp_images(graph_type::vertex_type& vertex)
{
    // Get vertex data
    vertex_data &vdata = vertex.data();
    
    Mat full_img = imread(vdata.img_path);
    
    Mat &img = vdata.img;
    Mat &img_warped = vdata.img_warped;
    Mat &img_warped_f = vdata.img_warped_f;
    //Mat &mask = vdata.mask;
    Mat mask;
    Mat &mask_warped = vdata.mask_warped;
    CameraParams &camera = vdata.camera;
    Point2f &corner = vdata.corner;
    Size &size = vdata.warp_size;
    
    if (full_img.empty())
        logstream(LOG_ERROR) << "Could not imread image: " << vdata.img_path << "\n";
    
    vdata.full_img_size = full_img.size();
    
    // Scale image if necessary
    double seam_scale = min(1.0, sqrt(opts.seam_megapix * 1e6 / full_img.size().area()));
    
    if ( abs(seam_scale-1) > 1e-3 )
        resize(full_img, img, Size(), seam_scale, seam_scale);
    else
        img = full_img.clone();
    
    // Prepare images mask
    mask.create(img.size(), CV_8U);
    mask.setTo(Scalar::all(255));
    
    if (opts.verbose > 2)
        LOGLN("Size of mask image #" << img.cols << "  " << img.rows); //
    
    // Warp images and their masks
    Ptr<WarperCreator> warper_creator;
    
    if (opts.warp_type == "plane") warper_creator = new cv::PlaneWarper();
    else if (opts.warp_type == "cylindrical") warper_creator = new cv::CylindricalWarper();
    else if (opts.warp_type == "spherical") warper_creator = new cv::SphericalWarper();
    else if (opts.warp_type == "fisheye") warper_creator = new cv::FisheyeWarper();
    else if (opts.warp_type == "stereographic") warper_creator = new cv::StereographicWarper();
    else if (opts.warp_type == "compressedPlaneA2B1") warper_creator = new cv::CompressedRectilinearWarper(2, 1);
    else if (opts.warp_type == "compressedPlaneA1.5B1") warper_creator = new cv::CompressedRectilinearWarper(1.5, 1);
    else if (opts.warp_type == "compressedPlanePortraitA2B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(2, 1);
    else if (opts.warp_type == "compressedPlanePortraitA1.5B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(1.5, 1);
    else if (opts.warp_type == "paniniA2B1") warper_creator = new cv::PaniniWarper(2, 1);
    else if (opts.warp_type == "paniniA1.5B1") warper_creator = new cv::PaniniWarper(1.5, 1);
    else if (opts.warp_type == "paniniPortraitA2B1") warper_creator = new cv::PaniniPortraitWarper(2, 1);
    else if (opts.warp_type == "paniniPortraitA1.5B1") warper_creator = new cv::PaniniPortraitWarper(1.5, 1);
    else if (opts.warp_type == "mercator") warper_creator = new cv::MercatorWarper();
    else if (opts.warp_type == "transverseMercator") warper_creator = new cv::TransverseMercatorWarper();
    
    
    if (warper_creator.empty())
        logstream(LOG_ERROR) << "Can't create the following warper '" << opts.warp_type << "'\n";
    
    Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(opts.warped_image_scale * opts.seam_work_aspect));
    Mat_<float> K;
    camera.K().convertTo(K, CV_32F);
    float swa = (float)opts.seam_work_aspect;
    K(0,0) *= swa; K(0,2) *= swa;
    K(1,1) *= swa; K(1,2) *= swa;
    
    corner = warper->warp(img, K, camera.R, INTER_LINEAR, BORDER_REFLECT, img_warped);
    if (opts.verbose > 2)
        LOGLN("Warp corners x : " << corner.x << "   y : " << corner.y << "\n");
    
    size = img_warped.size();
    if (opts.verbose > 2)
        LOGLN("Warp sizes height : " << size.height << "   width : " << size.width << "\n");
    
    warper->warp(mask, K, camera.R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
    
    img_warped.convertTo(img_warped_f, CV_32F);
    
    // If no gain compensator, then clear.
    img_warped.release();
    
}

/////////////////////////////////////////////////////////////////////////
// Function to composite images in parallel
void composite_images(graph_type::vertex_type& vertex)
{
    // Get vertex data
    vertex_data &vdata = vertex.data();
    CameraParams &camera = vdata.camera;
    Point2f &corner = vdata.corner;
    Mat full_img = imread(vdata.img_path);	//we have to check it later for speed
    
    Mat &img_warped = vdata.img_warped;		//added by me
    Mat &mask_warped = vdata.mask_warped;	//added by me
    Size &size = vdata.warp_size;		//added by me
    Mat mask, dilated_mask, seam_mask, masks_warped;		//added by me
    
    
    if (full_img.empty())
        logstream(LOG_ERROR) << "Could not imread image: " << vdata.img_path << "\n";
    
    Mat &img = vdata.img;
    
    // Update warped image scale
    Ptr<WarperCreator> warper_creator;
    
    if (opts.warp_type == "plane") warper_creator = new cv::PlaneWarper();
    else if (opts.warp_type == "cylindrical") warper_creator = new cv::CylindricalWarper();
    else if (opts.warp_type == "spherical") warper_creator = new cv::SphericalWarper();
    else if (opts.warp_type == "fisheye") warper_creator = new cv::FisheyeWarper();
    else if (opts.warp_type == "stereographic") warper_creator = new cv::StereographicWarper();
    else if (opts.warp_type == "compressedPlaneA2B1") warper_creator = new cv::CompressedRectilinearWarper(2, 1);
    else if (opts.warp_type == "compressedPlaneA1.5B1") warper_creator = new cv::CompressedRectilinearWarper(1.5, 1);
    else if (opts.warp_type == "compressedPlanePortraitA2B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(2, 1);
    else if (opts.warp_type == "compressedPlanePortraitA1.5B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(1.5, 1);
    else if (opts.warp_type == "paniniA2B1") warper_creator = new cv::PaniniWarper(2, 1);
    else if (opts.warp_type == "paniniA1.5B1") warper_creator = new cv::PaniniWarper(1.5, 1);
    else if (opts.warp_type == "paniniPortraitA2B1") warper_creator = new cv::PaniniPortraitWarper(2, 1);
    else if (opts.warp_type == "paniniPortraitA1.5B1") warper_creator = new cv::PaniniPortraitWarper(1.5, 1);
    else if (opts.warp_type == "mercator") warper_creator = new cv::MercatorWarper();
    else if (opts.warp_type == "transverseMercator") warper_creator = new cv::TransverseMercatorWarper();
    
    if (warper_creator.empty())
        logstream(LOG_ERROR) << "Can't create the following warper '" << opts.warp_type << "'\n";
    
    Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(opts.warped_image_scale * opts.compose_work_aspect));
    
    // Update intrinsics
    camera.focal *= opts.compose_work_aspect;
    camera.ppx *= opts.compose_work_aspect;
    camera.ppy *= opts.compose_work_aspect;
    
    // Update corner and size
    //vdata.full_img_size = full_img.size();
    Size sz = vdata.full_img_size;
    if (std::abs(opts.compose_scale - 1) > 1e-1)
    {
        sz.width = cvRound(vdata.full_img_size.width * opts.compose_scale);
        sz.height = cvRound(vdata.full_img_size.height * opts.compose_scale);
    }
    
    Mat K;
    camera.K().convertTo(K, CV_32F);
    Rect roi = warper->warpRoi(sz, K, camera.R);
    corner = roi.tl();
    if (opts.verbose > 2)
        LOGLN("Compose corner x : " << corner.x << "   y : " << corner.y << "\n");
    
    size = roi.size();
    if (opts.verbose > 2)
        LOGLN("Compose size height : " << size.height << "   width : " << size.width << "\n");
    
    if (abs(opts.compose_scale - 1) > 1e-1)
        resize(full_img, img, Size(), opts.compose_scale, opts.compose_scale);
    else
        img = full_img;
    Size img_size = img.size();
    
    // Warp the current image
    warper->warp(img, K, camera.R, INTER_LINEAR, BORDER_REFLECT, img_warped);
    
    // Warp the current image mask
    mask.create(img_size, CV_8U);
    mask.setTo(Scalar::all(255));
    warper->warp(mask, K, camera.R, INTER_NEAREST, BORDER_CONSTANT, masks_warped);
    
    // Compensate exposure
    //compensator->apply(img_idx, corner[img_idx], img_warped, mask_warped);
    
    img.release();
    mask.release();
    
    dilate(mask_warped, dilated_mask, Mat());
    resize(dilated_mask, seam_mask, masks_warped.size());
    mask_warped = seam_mask & masks_warped;
    
}

/////////////////////////////////////////////////////////////////////////
// Function to compute feature-matches in parallel on edges
void find_seams(graph_type::edge_type& edge)
{
    // Get edge data
    //edge_data &edata = edge.data(); //commented by me as it was unused
	
    // Get vertex ids of two vertices involved
    vertex_data &vdata1 = edge.source().data();
    vertex_data &vdata2 = edge.target().data();
    
    // Not sure why this is needed anymore?
    //Ptr<SeamFinder> seam_finder;
    //seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR);
    
    
    // Code from PairwiseSeamFinder::Impl::findInPair()
    //Mat img1 = images_[first], img2 = images_[second];
    Mat &img1 = vdata1.img_warped_f; Mat &img2 = vdata2.img_warped_f;
    vector<Mat> src; src.push_back(img1); src.push_back(img2);
    
    vector<Mat> dx_(2), dy_(2);  
    Mat dx, dy;
    for (size_t i = 0; i < src.size(); ++i)
    {
        CV_Assert(src[i].channels() == 3);
        Sobel(src[i], dx, CV_32F, 1, 0);
        Sobel(src[i], dy, CV_32F, 0, 1);
        dx_[i].create(src[i].size(), CV_32F);
        dy_[i].create(src[i].size(), CV_32F);
        for (int y = 0; y < src[i].rows; ++y)
        {
            const Point3f* dx_row = dx.ptr<Point3f>(y);
            const Point3f* dy_row = dy.ptr<Point3f>(y);
            float* dx_row_ = dx_[i].ptr<float>(y);
            float* dy_row_ = dy_[i].ptr<float>(y);
            for (int x = 0; x < src[i].cols; ++x)
            {
                dx_row_[x] = normL2(dx_row[x]);
                dy_row_[x] = normL2(dy_row[x]);
            }
        }
    }
    
    //Mat dx1 = dx_[first], dx2 = dx_[second];
    //Mat dy1 = dy_[first], dy2 = dy_[second];
    Mat &dx1 = dx_[0]; Mat &dx2 = dx_[1];
    Mat &dy1 = dy_[0]; Mat &dy2 = dy_[1];
    
    //Mat mask1 = masks_[first], mask2 = masks_[second];
    Mat &mask1 = vdata1.mask_warped; Mat &mask2 = vdata2.mask_warped;
    //Point tl1 = corners_[first], tl2 = corners_[second];
    Point2f &tl1 = vdata1.corner; Point2f &tl2 = vdata2.corner;
    
    Rect roi;
    overlapRoi(tl1, tl2, img1.size(), img2.size(), roi);
    
    const int gap = 10;
    Mat subimg1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat subimg2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32FC3);
    Mat submask1(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat submask2(roi.height + 2 * gap, roi.width + 2 * gap, CV_8U);
    Mat subdx1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy1(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdx2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    Mat subdy2(roi.height + 2 * gap, roi.width + 2 * gap, CV_32F);
    
    // Cut subimages and submasks with some gap
    for (int y = -gap; y < roi.height + gap; ++y)
    {
        for (int x = -gap; x < roi.width + gap; ++x)
        {
            int y1 = roi.y - tl1.y + y;
            int x1 = roi.x - tl1.x + x;
            if (y1 >= 0 && x1 >= 0 && y1 < img1.rows && x1 < img1.cols)
            {
                subimg1.at<Point3f>(y + gap, x + gap) = img1.at<Point3f>(y1, x1);
                submask1.at<uchar>(y + gap, x + gap) = mask1.at<uchar>(y1, x1);
                subdx1.at<float>(y + gap, x + gap) = dx1.at<float>(y1, x1);
                subdy1.at<float>(y + gap, x + gap) = dy1.at<float>(y1, x1);
            }
            else
            {
                subimg1.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask1.at<uchar>(y + gap, x + gap) = 0;
                subdx1.at<float>(y + gap, x + gap) = 0.f;
                subdy1.at<float>(y + gap, x + gap) = 0.f;
            }
            
            int y2 = roi.y - tl2.y + y;
            int x2 = roi.x - tl2.x + x;
            if (y2 >= 0 && x2 >= 0 && y2 < img2.rows && x2 < img2.cols)
            {
                subimg2.at<Point3f>(y + gap, x + gap) = img2.at<Point3f>(y2, x2);
                submask2.at<uchar>(y + gap, x + gap) = mask2.at<uchar>(y2, x2);
                subdx2.at<float>(y + gap, x + gap) = dx2.at<float>(y2, x2);
                subdy2.at<float>(y + gap, x + gap) = dy2.at<float>(y2, x2);
            }
            else
            {
                subimg2.at<Point3f>(y + gap, x + gap) = Point3f(0, 0, 0);
                submask2.at<uchar>(y + gap, x + gap) = 0;
                subdx2.at<float>(y + gap, x + gap) = 0.f;
                subdy2.at<float>(y + gap, x + gap) = 0.f;
            }
        }
    }
    
    const int vertex_count = (roi.height + 2 * gap) * (roi.width + 2 * gap);
    const int edge_count = (roi.height - 1 + 2 * gap) * (roi.width + 2 * gap) + (roi.width - 1 + 2 * gap) * (roi.height + 2 * gap);
    GCGraph<float> graph(vertex_count, edge_count);	
    
    
    const Size img_size = subimg1.size();
    
    if (opts.seam_find_type.compare("gc_color") ==0)
    {
    	// Set terminal weights
    	for (int y = 0; y < img_size.height; ++y)
    	{
            for (int x = 0; x < img_size.width; ++x)
            {
                int v = graph.addVtx();
                graph.addTermWeights(v, submask1.at<uchar>(y, x) ? opts.terminal_cost : 0.f, 
                                     submask2.at<uchar>(y, x) ? opts.terminal_cost : 0.f);
            }
    	}
        
    	// Set regular edge weights
    	const float weight_eps = 1.f;
        
        for (int y = 0; y < img_size.height; ++y)
    	{
            for (int x = 0; x < img_size.width; ++x)
            {
                int v = y * img_size.width + x;
                
                if (x < img_size.width - 1)
                {
                    float weight = normL2(subimg1.at<Point3f>(y, x), subimg2.at<Point3f>(y, x)) +
                    normL2(subimg1.at<Point3f>(y, x + 1), subimg2.at<Point3f>(y, x + 1)) + weight_eps;
                    
                    if (!submask1.at<uchar>(y, x) || !submask1.at<uchar>(y, x + 1) || 
                        !submask2.at<uchar>(y, x) || !submask2.at<uchar>(y, x + 1))
                        weight += opts.bad_region_penalty;
                    
                    graph.addEdges(v, v + 1, weight, weight);
            	}
            	if (y < img_size.height - 1)
            	{
                    float weight = normL2(subimg1.at<Point3f>(y, x), subimg2.at<Point3f>(y, x)) +
                    normL2(subimg1.at<Point3f>(y + 1, x), subimg2.at<Point3f>(y + 1, x)) + weight_eps;
                    
                    if (!submask1.at<uchar>(y, x) || !submask1.at<uchar>(y + 1, x) ||
                    	!submask2.at<uchar>(y, x) || !submask2.at<uchar>(y + 1, x))
                        weight += opts.bad_region_penalty;
                    
                    graph.addEdges(v, v + img_size.width, weight, weight);
            	}
       	    }
        }
    }
    
    
    else if (opts.seam_find_type.compare("gc_colorgrad") ==0)
    {
        // Set terminal weights
    	for (int y = 0; y < img_size.height; ++y)
    	{
            for (int x = 0; x < img_size.width; ++x)
            {
                int v = graph.addVtx();
                graph.addTermWeights(v, submask1.at<uchar>(y, x) ? opts.terminal_cost : 0.f, 
                                     submask2.at<uchar>(y, x) ? opts.terminal_cost : 0.f);
            }
        }
        
        // Set regular edge weights
        const float weight_eps = 1.f;
        for (int y = 0; y < img_size.height; ++y)
        {
            for (int x = 0; x < img_size.width; ++x)
            {
                int v = y * img_size.width + x;
                if (x < img_size.width - 1)
                {
                    float grad = subdx1.at<float>(y, x) + subdx1.at<float>(y, x + 1) +
                    subdx2.at<float>(y, x) + subdx2.at<float>(y, x + 1) + weight_eps;
                    float weight = (normL2(subimg1.at<Point3f>(y, x), subimg2.at<Point3f>(y, x)) +
                                    normL2(subimg1.at<Point3f>(y, x + 1), subimg2.at<Point3f>(y, x + 1))) / grad + weight_eps;
                    if (!submask1.at<uchar>(y, x) || !submask1.at<uchar>(y, x + 1) ||
                        !submask2.at<uchar>(y, x) || !submask2.at<uchar>(y, x + 1))
                        weight += opts.bad_region_penalty;
                    
                    graph.addEdges(v, v + 1, weight, weight);
                }
                if (y < img_size.height - 1)
                {
                    float grad = subdy1.at<float>(y, x) + subdy1.at<float>(y + 1, x) + 
                    subdy2.at<float>(y, x) + subdy2.at<float>(y + 1, x) + weight_eps;
                    float weight = (normL2(subimg1.at<Point3f>(y, x), subimg2.at<Point3f>(y, x)) + 
                                    normL2(subimg1.at<Point3f>(y + 1, x), subimg2.at<Point3f>(y + 1, x))) / grad + weight_eps;
                    
                    if (!submask1.at<uchar>(y, x) || !submask1.at<uchar>(y + 1, x) ||
                        !submask2.at<uchar>(y, x) || !submask2.at<uchar>(y + 1, x))
                        weight += opts.bad_region_penalty;
                    
                    graph.addEdges(v, v + img_size.width, weight, weight);
                }
            }
        }
    }
    
    else
        CV_Error(CV_StsBadArg, "unsupported pixel similarity measure");
    
    graph.maxFlow();
    
    for (int y = 0; y < roi.height; ++y)
    {
        for (int x = 0; x < roi.width; ++x)
        {
            if (graph.inSourceSegment((y + gap) * (roi.width + 2 * gap) + x + gap))
            {
                if (mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x))
                    mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x) = 0;
            }
            else
            {
                if (mask2.at<uchar>(roi.y - tl2.y + y, roi.x - tl2.x + x))
                    mask1.at<uchar>(roi.y - tl1.y + y, roi.x - tl1.x + x) = 0;
            }
        }
    }
}

/////////////////////////////////////////////////////////////////////////
// Map Function to compile a list of features
//vector<vertex_data> compile_features(const graph_type::vertex_type& vertex)
vector<vertex_data> compile_vertices(engine_type::icontext_type& context,
                                     const graph_type::vertex_type& vertex)
{
    vector<vertex_data> temp(context.num_vertices());
    
    temp[vertex.id()] = vertex.data();
    return temp;
}

/////////////////////////////////////////////////////////////////////////
// Map Function to compile a list of matches
//vector<vertex_data> compile_features(const graph_type::vertex_type& vertex)
vector<edge_data> compile_edges(engine_type::icontext_type& context,
                                const graph_type::edge_type& edge)
{
    
    int edlist_len = context.num_vertices() * context.num_vertices();
    vector<edge_data> temp(edlist_len);
    
    int pair_idx = edge.source().id() * context.num_vertices() + edge.target().id();
    temp[pair_idx] = edge.data();
    return temp;
}

#endif


================================================
FILE: toolkits/computer_vision/stitch_opts.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This file contains an example of graphlab used for stitching
 * multiple images into a panorama. The code is based on a example
 * stiching application in OpenCV.
 *
 *  \author Dhruv Batra
 */


#ifndef __STITCH_OPTS_HPP__
#define __STITCH_OPTS_HPP__

#include <string>

#include "opencv2/opencv_modules.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/util.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"


/////////////////////////////////////////////////////////////////////////
// Option Struct
struct Options
{
    // graphlab options
    std::string exec_type;
   
    // input output dirs
    std::string output_dir;

    int verbose;
   
    bool try_gpu;
    // size of images
    double work_megapix;
    double seam_megapix;
    double compose_megapix;
    double output_megapix;

    double work_scale;
    double seam_scale;
    double compose_scale;
    double output_scale;
   
    double seam_work_aspect;
    double compose_seam_aspect;
    double compose_work_aspect;
 //   double output_work_aspect;
   
    double warped_image_scale;
    std::string warp_type;

    // match options
    double conf_thresh;
    float match_conf;

    // seam options
    std::string seam_find_type;
    float terminal_cost;
    float bad_region_penalty; 

    //wave correction options
    std::string wave_correct_type;

    //bundle adjustment options
    std::string ba_cost_func;
    std::string ba_refine_mask;

    //gain compensation options
    std::string expose_comp_type;

    //blending options
    std::string blending_type;
    float blend_strength;

    //saving output
    std::string result_name;
        
   //saving the adjacency list for creating the graph
   //std::string graph_name;
   
    // Default values
    Options():
    exec_type("async"),
    output_dir("./"),
    verbose(0),
    try_gpu(false),
    work_megapix(0.6), seam_megapix(0.1), compose_megapix(-1), output_megapix(0.6),
    work_scale(1), seam_scale(1), compose_scale(1), output_scale(1),
    seam_work_aspect(1/6), compose_seam_aspect(1), compose_work_aspect(1),
    warped_image_scale(-1), warp_type("spherical"),
    conf_thresh(1.f), match_conf(0.3f),
    seam_find_type("gc_color"), terminal_cost(10000.f), bad_region_penalty(1000.f),
    wave_correct_type("horiz"),
    ba_cost_func("ray"),
    ba_refine_mask("xxxxx"),
    expose_comp_type("gain_blocks"),
    blending_type("multiband"), blend_strength(5),
    result_name("result_stitch.jpg")
    {}
};

// output_megapix(1), output_scale(1), 
extern Options opts;

#endif


================================================
FILE: toolkits/computer_vision/stitching.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <iostream>
#include <fstream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/stitching/stitcher.hpp"

using namespace std;
using namespace cv;

bool try_use_gpu = false;
vector<Mat> imgs;
string result_name = "result.png";

void printUsage();
int parseCmdArgs(int argc, char** argv);

int main(int argc, char* argv[])
{
    int retval = parseCmdArgs(argc, argv);
    if (retval) return -1;

    Mat pano;
    Stitcher stitcher = Stitcher::createDefault(try_use_gpu);
    Stitcher::Status status = stitcher.stitch(imgs, pano);

    if (status != Stitcher::OK)
    {
        cout << "Can't stitch images, error code = " << status << endl;
        return -1;
    }

    imwrite(result_name, pano);
    return 0;
}


void printUsage()
{
    cout <<
        "Rotation model images stitcher.\n\n"
        "stitching img1 img2 [...imgN]\n\n"
        "Flags:\n"
        "  --try_use_gpu (yes|no)\n"
        "      Try to use GPU. The default value is 'no'. All default values\n"
        "      are for CPU mode.\n"
        "  --output <result_img>\n"
        "      The default is 'result.jpg'.\n";
}


int parseCmdArgs(int argc, char** argv)
{
    if (argc == 1)
    {
        printUsage();
        return -1;
    }
    for (int i = 1; i < argc; ++i)
    {
        if (string(argv[i]) == "--help" || string(argv[i]) == "/?")
        {
            printUsage();
            return -1;
        }
        else if (string(argv[i]) == "--try_use_gpu")
        {
            if (string(argv[i + 1]) == "no")
                try_use_gpu = false;
            else if (string(argv[i + 1]) == "yes")
                try_use_gpu = true;
            else
            {
                cout << "Bad --try_use_gpu flag value\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--output")
        {
            result_name = argv[i + 1];
            i++;
        }
        else
        {
            Mat img = imread(argv[i]);
            if (img.empty())
            {
                cout << "Can't read image '" << argv[i] << "'\n";
                return -1;
            }
            imgs.push_back(img);
        }
    }
    return 0;
}


================================================
FILE: toolkits/computer_vision/stitching_detailed.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <iostream>
#include <fstream>
#include <string>
#include "opencv2/opencv_modules.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/stitching/detail/autocalib.hpp"
#include "opencv2/stitching/detail/blenders.hpp"
#include "opencv2/stitching/detail/camera.hpp"
#include "opencv2/stitching/detail/exposure_compensate.hpp"
#include "opencv2/stitching/detail/matchers.hpp"
#include "opencv2/stitching/detail/motion_estimators.hpp"
#include "opencv2/stitching/detail/seam_finders.hpp"
#include "opencv2/stitching/detail/util.hpp"
#include "opencv2/stitching/detail/warpers.hpp"
#include "opencv2/stitching/warpers.hpp"

using namespace std;
using namespace cv;
using namespace cv::detail;

void printUsage()
{
    cout <<
        "Rotation model images stitcher.\n\n"
        "stitching_detailed img1 img2 [...imgN] [flags]\n\n"
        "Flags:\n"
        "  --preview\n"
        "      Run stitching in the preview mode. Works faster than usual mode,\n"
        "      but output image will have lower resolution.\n"
        "  --try_gpu (yes|no)\n"
        "      Try to use GPU. The default value is 'no'. All default values\n"
        "      are for CPU mode.\n"
        "\nMotion Estimation Flags:\n"
        "  --work_megapix <float>\n"
        "      Resolution for image registration step. The default is 0.6 Mpx.\n"
        "  --features (surf|orb)\n"
        "      Type of features used for images matching. The default is surf.\n"
        "  --match_conf <float>\n"
        "      Confidence for feature matching step. The default is 0.65 for surf and 0.3 for orb.\n"
        "  --conf_thresh <float>\n"
        "      Threshold for two images are from the same panorama confidence.\n"
        "      The default is 1.0.\n"
        "  --ba (reproj|ray)\n"
        "      Bundle adjustment cost function. The default is ray.\n"
        "  --ba_refine_mask (mask)\n"
        "      Set refinement mask for bundle adjustment. It looks like 'x_xxx',\n"
        "      where 'x' means refine respective parameter and '_' means don't\n"
        "      refine one, and has the following format:\n"
        "      <fx><skew><ppx><aspect><ppy>. The default mask is 'xxxxx'. If bundle\n"
        "      adjustment doesn't support estimation of selected parameter then\n"
        "      the respective flag is ignored.\n"
        "  --wave_correct (no|horiz|vert)\n"
        "      Perform wave effect correction. The default is 'horiz'.\n"
        "  --save_graph <file_name>\n"
        "      Save matches graph represented in DOT language to <file_name> file.\n"
        "      Labels description: Nm is number of matches, Ni is number of inliers,\n"
        "      C is confidence.\n"
        "\nCompositing Flags:\n"
        "  --warp (plane|cylindrical|spherical|fisheye|stereographic|compressedPlaneA2B1|compressedPlaneA1.5B1|compressedPlanePortraitA2B1|compressedPlanePortraitA1.5B1|paniniA2B1|paniniA1.5B1|paniniPortraitA2B1|paniniPortraitA1.5B1|mercator|transverseMercator)\n"
        "      Warp surface type. The default is 'spherical'.\n"
        "  --seam_megapix <float>\n"
        "      Resolution for seam estimation step. The default is 0.1 Mpx.\n"
        "  --seam (no|voronoi|gc_color|gc_colorgrad)\n"
        "      Seam estimation method. The default is 'gc_color'.\n"
        "  --compose_megapix <float>\n"
        "      Resolution for compositing step. Use -1 for original resolution.\n"
        "      The default is -1.\n"
        "  --expos_comp (no|gain|gain_blocks)\n"
        "      Exposure compensation method. The default is 'gain_blocks'.\n"
        "  --blend (no|feather|multiband)\n"
        "      Blending method. The default is 'multiband'.\n"
        "  --blend_strength <float>\n"
        "      Blending strength from [0,100] range. The default is 5.\n"
        "  --output <result_img>\n"
        "      The default is 'result.jpg'.\n";
}


// Default command line args
vector<string> img_names;
bool preview = false;
bool try_gpu = false;
double work_megapix = 0.6;
double seam_megapix = 0.1;
double compose_megapix = -1;
float conf_thresh = 1.f;
string features = "surf";
string ba_cost_func = "ray";
string ba_refine_mask = "xxxxx";
bool do_wave_correct = true;
WaveCorrectKind wave_correct = detail::WAVE_CORRECT_HORIZ;
bool save_graph = false;
std::string save_graph_to;
string warp_type = "spherical";
int expos_comp_type = ExposureCompensator::GAIN_BLOCKS;
float match_conf = 0.3f;
string seam_find_type = "gc_color";
int blend_type = Blender::MULTI_BAND;
float blend_strength = 5;
string result_name = "result.jpg";

int parseCmdArgs(int argc, char** argv)
{
    if (argc == 1)
    {
        printUsage();
        return -1;
    }
    for (int i = 1; i < argc; ++i)
    {
        if (string(argv[i]) == "--help" || string(argv[i]) == "/?")
        {
            printUsage();
            return -1;
        }
        else if (string(argv[i]) == "--preview")
        {
            preview = true;
        }
        else if (string(argv[i]) == "--try_gpu")
        {
            if (string(argv[i + 1]) == "no")
                try_gpu = false;
            else if (string(argv[i + 1]) == "yes")
                try_gpu = true;
            else
            {
                cout << "Bad --try_gpu flag value\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--work_megapix")
        {
            work_megapix = atof(argv[i + 1]);
            i++;
        }
        else if (string(argv[i]) == "--seam_megapix")
        {
            seam_megapix = atof(argv[i + 1]);
            i++;
        }
        else if (string(argv[i]) == "--compose_megapix")
        {
            compose_megapix = atof(argv[i + 1]);
            i++;
        }
        else if (string(argv[i]) == "--result")
        {
            result_name = argv[i + 1];
            i++;
        }
        else if (string(argv[i]) == "--features")
        {
            features = argv[i + 1];
            if (features == "orb")
                match_conf = 0.3f;
            i++;
        }
        else if (string(argv[i]) == "--match_conf")
        {
            match_conf = static_cast<float>(atof(argv[i + 1]));
            i++;
        }
        else if (string(argv[i]) == "--conf_thresh")
        {
            conf_thresh = static_cast<float>(atof(argv[i + 1]));
            i++;
        }
        else if (string(argv[i]) == "--ba")
        {
            ba_cost_func = argv[i + 1];
            i++;
        }
        else if (string(argv[i]) == "--ba_refine_mask")
        {
            ba_refine_mask = argv[i + 1];
            if (ba_refine_mask.size() != 5)
            {
                cout << "Incorrect refinement mask length.\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--wave_correct")
        {
            if (string(argv[i + 1]) == "no")
                do_wave_correct = false;
            else if (string(argv[i + 1]) == "horiz")
            {
                do_wave_correct = true;
                wave_correct = detail::WAVE_CORRECT_HORIZ;
            }
            else if (string(argv[i + 1]) == "vert")
            {
                do_wave_correct = true;
                wave_correct = detail::WAVE_CORRECT_VERT;
            }
            else
            {
                cout << "Bad --wave_correct flag value\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--save_graph")
        {
            save_graph = true;
            save_graph_to = argv[i + 1];
            i++;
        }
        else if (string(argv[i]) == "--warp")
        {
            warp_type = string(argv[i + 1]);
            i++;
        }
        else if (string(argv[i]) == "--expos_comp")
        {
            if (string(argv[i + 1]) == "no")
                expos_comp_type = ExposureCompensator::NO;
            else if (string(argv[i + 1]) == "gain")
                expos_comp_type = ExposureCompensator::GAIN;
            else if (string(argv[i + 1]) == "gain_blocks")
                expos_comp_type = ExposureCompensator::GAIN_BLOCKS;
            else
            {
                cout << "Bad exposure compensation method\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--seam")
        {
            if (string(argv[i + 1]) == "no" ||
                string(argv[i + 1]) == "voronoi" ||
                string(argv[i + 1]) == "gc_color" ||
                string(argv[i + 1]) == "gc_colorgrad")
                seam_find_type = argv[i + 1];
            else
            {
                cout << "Bad seam finding method\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--blend")
        {
            if (string(argv[i + 1]) == "no")
                blend_type = Blender::NO;
            else if (string(argv[i + 1]) == "feather")
                blend_type = Blender::FEATHER;
            else if (string(argv[i + 1]) == "multiband")
                blend_type = Blender::MULTI_BAND;
            else
            {
                cout << "Bad blending method\n";
                return -1;
            }
            i++;
        }
        else if (string(argv[i]) == "--blend_strength")
        {
            blend_strength = static_cast<float>(atof(argv[i + 1]));
            i++;
        }
        else if (string(argv[i]) == "--output")
        {
            result_name = argv[i + 1];
            i++;
        }
        else
            img_names.push_back(argv[i]);
    }
    if (preview)
    {
        compose_megapix = 0.6;
    }
    return 0;
}


int main(int argc, char* argv[])
{
    int64 app_start_time = getTickCount();
    cv::setBreakOnError(true);

    int retval = parseCmdArgs(argc, argv);
    if (retval)
        return retval;

    // Check if have enough images
    int num_images = static_cast<int>(img_names.size());
    if (num_images < 2)
    {
        LOGLN("Need more images");
        return -1;
    }

    double work_scale = 1, seam_scale = 1, compose_scale = 1;
    bool is_work_scale_set = false, is_seam_scale_set = false, is_compose_scale_set = false;

    LOGLN("Finding features...");
    int64 t = getTickCount();

    Ptr<FeaturesFinder> finder;
    if (features == "surf")
    {
#ifdef HAVE_OPENCV_GPU
        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
            finder = new SurfFeaturesFinderGpu();
        else
#endif
            finder = new SurfFeaturesFinder();
    }
    else if (features == "orb")
    {
        finder = new OrbFeaturesFinder();
    }
    else
    {
        cout << "Unknown 2D features type: '" << features << "'.\n";
        return -1;
    }

    Mat full_img, img;
    vector<ImageFeatures> features(num_images);
    vector<Mat> images(num_images);
    vector<Size> full_img_sizes(num_images);
    double seam_work_aspect = 1;

    for (int i = 0; i < num_images; ++i)
    {
        full_img = imread(img_names[i]);
        full_img_sizes[i] = full_img.size();

        if (full_img.empty())
        {
            LOGLN("Can't open image " << img_names[i]);
            return -1;
        }
        if (work_megapix < 0)
        {
            img = full_img;
            work_scale = 1;
            is_work_scale_set = true;
        }
        else
        {
            if (!is_work_scale_set)
            {
                work_scale = min(1.0, sqrt(work_megapix * 1e6 / full_img.size().area()));
                is_work_scale_set = true;
            }
            resize(full_img, img, Size(), work_scale, work_scale);
        }
        if (!is_seam_scale_set)
        {
            seam_scale = min(1.0, sqrt(seam_megapix * 1e6 / full_img.size().area()));
            seam_work_aspect = seam_scale / work_scale;
            is_seam_scale_set = true;
        }

        (*finder)(img, features[i]);
        features[i].img_idx = i;
        LOGLN("Features in image #" << i << ": " << features[i].keypoints.size());

        resize(full_img, img, Size(), seam_scale, seam_scale);
        images[i] = img.clone();
    }

    finder->collectGarbage();
    full_img.release();
    img.release();

    LOGLN("Finding features, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");

    LOG("Pairwise matching");
    t = getTickCount();
    vector<MatchesInfo> pairwise_matches;
    BestOf2NearestMatcher matcher(try_gpu, match_conf);
    matcher(features, pairwise_matches);
    matcher.collectGarbage();
    LOGLN("Pairwise matching, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
    LOGLN("pairwise_matches.size() = " << pairwise_matches.size() << "\n");
    for (size_t i=0; i!=pairwise_matches.size(); ++i) 
    {
        LOGLN("src_img_idx = " << pairwise_matches[i].src_img_idx  << "\t");
        LOGLN("dst_img_idx = " << pairwise_matches[i].dst_img_idx  << "\t");
        LOGLN("matches.size() = " << pairwise_matches[i].matches.size()  << "\t");
        LOGLN("num_inliers = " << pairwise_matches[i].num_inliers  << "\t");
        LOGLN("confidence = " << pairwise_matches[i].confidence  << "\n");
    }
    
    // Check if we should save matches graph
    if (save_graph)
    {
        LOGLN("Saving matches graph...");
        ofstream f(save_graph_to.c_str());
        f << matchesGraphAsString(img_names, pairwise_matches, conf_thresh);
//        for (int i=0; i!=pairwise_matches.size(); ++i)
//            f << pairwise_matches[i].src_img_idx << " " << pairwise_matches[i].dst_img_idx << "\n";
    }

    // Leave only images we are sure are from the same panorama
    vector<int> indices = leaveBiggestComponent(features, pairwise_matches, conf_thresh);
    vector<Mat> img_subset;
    vector<string> img_names_subset;
    vector<Size> full_img_sizes_subset;
    for (size_t i = 0; i < indices.size(); ++i)
    {
        img_names_subset.push_back(img_names[indices[i]]);
        img_subset.push_back(images[indices[i]]);
        full_img_sizes_subset.push_back(full_img_sizes[indices[i]]);
    }

    images = img_subset;
    img_names = img_names_subset;
    full_img_sizes = full_img_sizes_subset;

    // Check if we still have enough images
    num_images = static_cast<int>(img_names.size());
    if (num_images < 2)
    {
        LOGLN("Need more images");
        return -1;
    }

    LOG("Homography-based init\n");
    t = getTickCount();
    HomographyBasedEstimator estimator;
    vector<CameraParams> cameras;
    estimator(features, pairwise_matches, cameras);
    LOGLN("Homography-based init, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");

    for (size_t i = 0; i < cameras.size(); ++i)
    {
        Mat R;
        cameras[i].R.convertTo(R, CV_32F);
        cameras[i].R = R;
        LOGLN("Initial intrinsics #" << indices[i]+1 << ":\n" << cameras[i].K());
    }

    LOG("Bundle Adjustment\n");
    t = getTickCount();
    Ptr<detail::BundleAdjusterBase> adjuster;
    if (ba_cost_func == "reproj") adjuster = new detail::BundleAdjusterReproj();
    else if (ba_cost_func == "ray") adjuster = new detail::BundleAdjusterRay();
    else 
    { 
        cout << "Unknown bundle adjustment cost function: '" << ba_cost_func << "'.\n"; 
        return -1; 
    }
    adjuster->setConfThresh(conf_thresh);
    Mat_<uchar> refine_mask = Mat::zeros(3, 3, CV_8U);
    if (ba_refine_mask[0] == 'x') refine_mask(0,0) = 1;
    if (ba_refine_mask[1] == 'x') refine_mask(0,1) = 1;
    if (ba_refine_mask[2] == 'x') refine_mask(0,2) = 1;
    if (ba_refine_mask[3] == 'x') refine_mask(1,1) = 1;
    if (ba_refine_mask[4] == 'x') refine_mask(1,2) = 1;
    adjuster->setRefinementMask(refine_mask);
    (*adjuster)(features, pairwise_matches, cameras);
    LOGLN("Bundle Adjustment, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");

    // Find median focal length

    vector<double> focals;
    for (size_t i = 0; i < cameras.size(); ++i)
    {
        LOGLN("Camera #" << indices[i]+1 << ":\n" << cameras[i].K());
        focals.push_back(cameras[i].focal);
    }

    sort(focals.begin(), focals.end());
    //LOGLN("Focals size: " << focals.size() << "\n");
    //LOGLN(" focals: " << focals[0] << "\t" << focals[1] << "\t" << focals[2] << "\n");  
    //LOGLN("Focals size: " << focals.size() << "\n");    

 
    float warped_image_scale;
    if (focals.size() % 2 == 1)
        warped_image_scale = static_cast<float>(focals[focals.size() / 2]);
    else
        warped_image_scale = static_cast<float>(focals[focals.size() / 2 - 1] + focals[focals.size() / 2]) * 0.5f;

    if (do_wave_correct)
    {
        vector<Mat> rmats;
        for (size_t i = 0; i < cameras.size(); ++i)
            rmats.push_back(cameras[i].R);
        waveCorrect(rmats, wave_correct);
        for (size_t i = 0; i < cameras.size(); ++i)
            cameras[i].R = rmats[i];
    }

    LOGLN("Warping images (auxiliary)... ");
    t = getTickCount();

    vector<Point> corners(num_images);
    vector<Mat> masks_warped(num_images);
    vector<Mat> images_warped(num_images);
    vector<Size> sizes(num_images);
    vector<Mat> masks(num_images);

    // Preapre images masks
    for (int i = 0; i < num_images; ++i)
    {
        masks[i].create(images[i].size(), CV_8U);
        masks[i].setTo(Scalar::all(255));
    }

    // Warp images and their masks

    Ptr<WarperCreator> warper_creator;
#ifdef HAVE_OPENCV_GPU
    if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
    {
        if (warp_type == "plane") warper_creator = new cv::PlaneWarperGpu();
        else if (warp_type == "cylindrical") warper_creator = new cv::CylindricalWarperGpu();
        else if (warp_type == "spherical") warper_creator = new cv::SphericalWarperGpu();
    }
    else
#endif
    {
        if (warp_type == "plane") warper_creator = new cv::PlaneWarper();
        else if (warp_type == "cylindrical") warper_creator = new cv::CylindricalWarper();
        else if (warp_type == "spherical") warper_creator = new cv::SphericalWarper();
		else if (warp_type == "fisheye") warper_creator = new cv::FisheyeWarper();
		else if (warp_type == "stereographic") warper_creator = new cv::StereographicWarper();
		else if (warp_type == "compressedPlaneA2B1") warper_creator = new cv::CompressedRectilinearWarper(2, 1);
		else if (warp_type == "compressedPlaneA1.5B1") warper_creator = new cv::CompressedRectilinearWarper(1.5, 1);
		else if (warp_type == "compressedPlanePortraitA2B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(2, 1);
		else if (warp_type == "compressedPlanePortraitA1.5B1") warper_creator = new cv::CompressedRectilinearPortraitWarper(1.5, 1);
		else if (warp_type == "paniniA2B1") warper_creator = new cv::PaniniWarper(2, 1);
		else if (warp_type == "paniniA1.5B1") warper_creator = new cv::PaniniWarper(1.5, 1);
		else if (warp_type == "paniniPortraitA2B1") warper_creator = new cv::PaniniPortraitWarper(2, 1);
		else if (warp_type == "paniniPortraitA1.5B1") warper_creator = new cv::PaniniPortraitWarper(1.5, 1);
		else if (warp_type == "mercator") warper_creator = new cv::MercatorWarper();
		else if (warp_type == "transverseMercator") warper_creator = new cv::TransverseMercatorWarper();
    }

    if (warper_creator.empty())
    {
        cout << "Can't create the following warper '" << warp_type << "'\n";
        return 1;
    }
    
    Ptr<RotationWarper> warper = warper_creator->create(static_cast<float>(warped_image_scale * seam_work_aspect));

    for (int i = 0; i < num_images; ++i)
    {
        Mat_<float> K;
        cameras[i].K().convertTo(K, CV_32F);
        float swa = (float)seam_work_aspect;
        K(0,0) *= swa; K(0,2) *= swa;
        K(1,1) *= swa; K(1,2) *= swa;

        corners[i] = warper->warp(images[i], K, cameras[i].R, INTER_LINEAR, BORDER_REFLECT, images_warped[i]);
        //cout << "corners x : " << corners[i].x << "   y : " << corners[i].y << endl;
        sizes[i] = images_warped[i].size();
        //cout << "sizes height : " << sizes[i].height << "   width : " << sizes[i].width << endl;

        warper->warp(masks[i], K, cameras[i].R, INTER_NEAREST, BORDER_CONSTANT, masks_warped[i]);
    }

    vector<Mat> images_warped_f(num_images);
    for (int i = 0; i < num_images; ++i)
        images_warped[i].convertTo(images_warped_f[i], CV_32F);

    LOGLN("Warping images, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");

    Ptr<ExposureCompensator> compensator = ExposureCompensator::createDefault(expos_comp_type);
    compensator->feed(corners, images_warped, masks_warped);

    Ptr<SeamFinder> seam_finder;
    if (seam_find_type == "no")
        seam_finder = new detail::NoSeamFinder();
    else if (seam_find_type == "voronoi")
        seam_finder = new detail::VoronoiSeamFinder();
    else if (seam_find_type == "gc_color")
    {
#ifdef HAVE_OPENCV_GPU
        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
            seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR);
        else
#endif
            seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR);
    }
    else if (seam_find_type == "gc_colorgrad")
    {
#ifdef HAVE_OPENCV_GPU
        if (try_gpu && gpu::getCudaEnabledDeviceCount() > 0)
            seam_finder = new detail::GraphCutSeamFinderGpu(GraphCutSeamFinderBase::COST_COLOR_GRAD);
        else
#endif
            seam_finder = new detail::GraphCutSeamFinder(GraphCutSeamFinderBase::COST_COLOR_GRAD);
    }
    if (seam_finder.empty())
    {
        cout << "Can't create the following seam finder '" << seam_find_type << "'\n";
        return 1;
    }

    seam_finder->find(images_warped_f, corners, masks_warped);

    // Release unused memory
    images.clear();
    images_warped.clear();
    images_warped_f.clear();
    masks.clear();

    LOGLN("Compositing...");
    t = getTickCount();

    Mat img_warped, img_warped_s;
    Mat dilated_mask, seam_mask, mask, mask_warped;
    Ptr<Blender> blender;
    double compose_seam_aspect = 1;
    double compose_work_aspect = 1;

    for (int img_idx = 0; img_idx < num_images; ++img_idx)
    {
        LOGLN("Compositing image #" << indices[img_idx]+1);

        // Read image and resize it if necessary
        full_img = imread(img_names[img_idx]);
        if (!is_compose_scale_set)
        {
            if (compose_megapix > 0)
                compose_scale = min(1.0, sqrt(compose_megapix * 1e6 / full_img.size().area()));
            is_compose_scale_set = true;

            // Compute relative scales
            compose_seam_aspect = compose_scale / seam_scale;
            compose_work_aspect = compose_scale / work_scale;

            // Update warped image scale
            warped_image_scale *= static_cast<float>(compose_work_aspect);
            warper = warper_creator->create(warped_image_scale);

            // Update corners and sizes
            for (int i = 0; i < num_images; ++i)
            {
                // Update intrinsics
                cameras[i].focal *= compose_work_aspect;
                cameras[i].ppx *= compose_work_aspect;
                cameras[i].ppy *= compose_work_aspect;

                // Update corner and size
                Size sz = full_img_sizes[i];
                if (std::abs(compose_scale - 1) > 1e-1)
                {
                    sz.width = cvRound(full_img_sizes[i].width * compose_scale);
                    sz.height = cvRound(full_img_sizes[i].height * compose_scale);
                }

                Mat K;
                cameras[i].K().convertTo(K, CV_32F);
                Rect roi = warper->warpRoi(sz, K, cameras[i].R);
                corners[i] = roi.tl();
                //cout << "corner x : " << corners[i].x << "   y : " << corners[i].y << endl;
                sizes[i] = roi.size();
                //cout << "size height : " << sizes[i].height << "   width : " << sizes[i].width << endl;
            }
        }
        if (abs(compose_scale - 1) > 1e-1)
            resize(full_img, img, Size(), compose_scale, compose_scale);
        else
            img = full_img;
        full_img.release();
        Size img_size = img.size();

        Mat K;
        cameras[img_idx].K().convertTo(K, CV_32F);

        // Warp the current image
        warper->warp(img, K, cameras[img_idx].R, INTER_LINEAR, BORDER_REFLECT, img_warped);

        // Warp the current image mask
        mask.create(img_size, CV_8U);
        mask.setTo(Scalar::all(255));
        warper->warp(mask, K, cameras[img_idx].R, INTER_NEAREST, BORDER_CONSTANT, mask_warped);
        
        // Compensate exposure
        compensator->apply(img_idx, corners[img_idx], img_warped, mask_warped);

        img_warped.convertTo(img_warped_s, CV_16S);
        img_warped.release();
        img.release();
        mask.release();

        dilate(masks_warped[img_idx], dilated_mask, Mat());
        resize(dilated_mask, seam_mask, mask_warped.size());
        mask_warped = seam_mask & mask_warped;

        if (blender.empty())
        {
            blender = Blender::createDefault(blend_type, try_gpu);
            Size dst_sz = resultRoi(corners, sizes).size();
            float blend_width = sqrt(static_cast<float>(dst_sz.area())) * blend_strength / 100.f;
            if (blend_width < 1.f)
                blender = Blender::createDefault(Blender::NO, try_gpu);
            else if (blend_type == Blender::MULTI_BAND)
            {
                MultiBandBlender* mb = dynamic_cast<MultiBandBlender*>(static_cast<Blender*>(blender));
                mb->setNumBands(static_cast<int>(ceil(log(blend_width)/log(2.)) - 1.));
                LOGLN("Multi-band blender, number of bands: " << mb->numBands());
            }
            else if (blend_type == Blender::FEATHER)
            {
                FeatherBlender* fb = dynamic_cast<FeatherBlender*>(static_cast<Blender*>(blender));
                fb->setSharpness(1.f/blend_width);
                LOGLN("Feather blender, sharpness: " << fb->sharpness());
            }
            blender->prepare(corners, sizes);
        }

        // Blend the current image
        blender->feed(img_warped_s, mask_warped, corners[img_idx]);
    }

    Mat result, result_mask;
    blender->blend(result, result_mask);

    LOGLN("Compositing, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");

    imwrite(result_name, result);

    LOGLN("Finished, total time: " << ((getTickCount() - app_start_time) / getTickFrequency()) << " sec");
    return 0;
}


================================================
FILE: toolkits/computer_vision/utils.hpp
================================================

// utils.hpp - miscellaneous utilities 
// Originally from Nicol N. Schraudolph's isinf package
// Later expanded by Dhruv Batra

#ifndef UTILS_HPP
#define UTILS_HPP

#include <cmath>
#include <assert.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <set>
#include <limits>
#include <cstdlib>
#include <string>

// row-major array access
#define ARR_RM(arr, r_ind, c_ind, ncols) (*(arr + r_ind*ncols + c_ind))
// col-major array access
#define ARR_CM(arr, r_ind, c_ind, nrows) (*(arr + c_ind*nrows + r_ind))

// row-major ind2sub
#define IND2SUB_RM(ind,r,c,ncols) \
        r = floor(ind/ncols);     \
        c = ind % ncols;
// column-major ind2sub
#define IND2SUB_CM(ind,r,c,nrows) \
        c = floor(ind/nrows);     \
        r = ind % nrows;     

// row-major sub2ind
#define SUB2IND_RM(r,c,ncols) r*ncols + c

// col-major sub2ind
#define SUB2IND_CM(r,c,nrows) c*nrows + r

// operators & formatted I/O for vectors
// inner product
template <class T>
inline T operator*(const std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    T sum(0);
    for (size_t i = 0; i < a.size(); ++i)
        sum += a[i]*b[i];
    return sum;
}

// element-wise sum
template <class T>
inline std::vector<T>& operator+(const std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    std::vector<T> sum(a.size());
    for (size_t i = 0; i < a.size(); ++i)
        sum[i] = a[i]+b[i];
    return sum;
}

template <class T>
inline std::vector<T>& operator+=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ += b;
    return a;
}

template <class T>
inline std::vector<T>& operator-=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ -= b;
    return a;
}

template <class T>
inline std::vector<T>& operator*=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ *= b;
    return a;
}

template <class T>
inline std::vector<T>& operator/=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ /= b;
    return a;
}

template <class T>
inline std::vector<T>& operator+=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ += *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator-=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ -= *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator*=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ *= *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator/=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ /= *j++;
    return a;
}

template <class T>
inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& x)
{
    typename std::vector<T>::const_iterator i(x.begin());
    while(i != x.end()) os << *i++ << ' ';
    return os;
}

template <class T>
inline std::istream& operator>>(std::istream& is, std::vector<T>& x)
{
    std::string s;
    const size_t n = x.size();

    while (x.size() == n)
    {
        getline(is, s);
        if (is.fail()) break;

        std::istringstream iss(s);
        T item;

        iss >> item;
        while (iss.good())
        {
            x.push_back(item);
            iss >> item;
        }
        if (!iss.fail())
            x.push_back(item);
    }

    return is;
}

// Function to write a vector to file (Assumes << is defined for type T)
// CHECK_NULL is provided by Danny Tarlow's Nymph Utils
template <typename T> void WriteToFile(std::string fname, std::vector<T> vecx)
{
    std::ofstream fout; 
    fout.open(fname.c_str());
    
    //CHECK_NULL(fout.fail(),"Could not open file for writing results\n");
    
    fout << vecx;
    
    fout.close();
}


#endif


================================================
FILE: toolkits/extensions/CMakeLists.txt
================================================
project(Extensions)

if(CPP11)

add_library(graphlab_extension STATIC 
            MurmurHash3.cpp 
            extension.cpp 
            extension_graph.cpp
            extension_pagerank.cpp)
add_dependencies(graphlab_extension graphlab)
target_link_libraries(graphlab_extension graphlab)

add_extension_executable(pagerank_extension_driver
                         pagerank_extension_driver.cpp) 

endif()


================================================
FILE: toolkits/extensions/MurmurHash3.cpp
================================================
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.

#include "MurmurHash3.h"

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER)

#define FORCE_INLINE	__forceinline

#include <stdlib.h>

#define ROTL32(x,y)	_rotl(x,y)
#define ROTL64(x,y)	_rotl64(x,y)

#define BIG_CONSTANT(x) (x)

// Other compilers

#else	// defined(_MSC_VER)

#define	FORCE_INLINE __attribute__((always_inline))

inline uint32_t rotl32 ( uint32_t x, int8_t r )
{
  return (x << r) | (x >> (32 - r));
}

inline uint64_t rotl64 ( uint64_t x, int8_t r )
{
  return (x << r) | (x >> (64 - r));
}

#define	ROTL32(x,y)	rotl32(x,y)
#define ROTL64(x,y)	rotl64(x,y)

#define BIG_CONSTANT(x) (x##LLU)

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
{
  return p[i];
}

FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
{
  return p[i];
}

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

FORCE_INLINE uint32_t fmix ( uint32_t h )
{
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;

  return h;
}

//----------

FORCE_INLINE uint64_t fmix ( uint64_t k )
{
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
  k ^= k >> 33;
  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
  k ^= k >> 33;

  return k;
}

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32 ( const void * key, int len,
                          uint32_t seed, void * out )
{
  const uint8_t * data = (const uint8_t*)key;
  const int nblocks = len / 4;

  uint32_t h1 = seed;

  const uint32_t c1 = 0xcc9e2d51;
  const uint32_t c2 = 0x1b873593;

  //----------
  // body

  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);

  for(int i = -nblocks; i; i++)
  {
    uint32_t k1 = getblock(blocks,i);

    k1 *= c1;
    k1 = ROTL32(k1,15);
    k1 *= c2;
    
    h1 ^= k1;
    h1 = ROTL32(h1,13); 
    h1 = h1*5+0xe6546b64;
  }

  //----------
  // tail

  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);

  uint32_t k1 = 0;

  switch(len & 3)
  {
  case 3: k1 ^= tail[2] << 16;
  case 2: k1 ^= tail[1] << 8;
  case 1: k1 ^= tail[0];
          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
  };

  //----------
  // finalization

  h1 ^= len;

  h1 = fmix(h1);

  *(uint32_t*)out = h1;
} 

//-----------------------------------------------------------------------------

void MurmurHash3_x86_128 ( const void * key, const int len,
                           uint32_t seed, void * out )
{
  const uint8_t * data = (const uint8_t*)key;
  const int nblocks = len / 16;

  uint32_t h1 = seed;
  uint32_t h2 = seed;
  uint32_t h3 = seed;
  uint32_t h4 = seed;

  const uint32_t c1 = 0x239b961b; 
  const uint32_t c2 = 0xab0e9789;
  const uint32_t c3 = 0x38b34ae5; 
  const uint32_t c4 = 0xa1e38b93;

  //----------
  // body

  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);

  for(int i = -nblocks; i; i++)
  {
    uint32_t k1 = getblock(blocks,i*4+0);
    uint32_t k2 = getblock(blocks,i*4+1);
    uint32_t k3 = getblock(blocks,i*4+2);
    uint32_t k4 = getblock(blocks,i*4+3);

    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;

    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;

    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;

    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;

    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;

    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;

    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;

    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
  }

  //----------
  // tail

  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);

  uint32_t k1 = 0;
  uint32_t k2 = 0;
  uint32_t k3 = 0;
  uint32_t k4 = 0;

  switch(len & 15)
  {
  case 15: k4 ^= tail[14] << 16;
  case 14: k4 ^= tail[13] << 8;
  case 13: k4 ^= tail[12] << 0;
           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;

  case 12: k3 ^= tail[11] << 24;
  case 11: k3 ^= tail[10] << 16;
  case 10: k3 ^= tail[ 9] << 8;
  case  9: k3 ^= tail[ 8] << 0;
           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;

  case  8: k2 ^= tail[ 7] << 24;
  case  7: k2 ^= tail[ 6] << 16;
  case  6: k2 ^= tail[ 5] << 8;
  case  5: k2 ^= tail[ 4] << 0;
           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;

  case  4: k1 ^= tail[ 3] << 24;
  case  3: k1 ^= tail[ 2] << 16;
  case  2: k1 ^= tail[ 1] << 8;
  case  1: k1 ^= tail[ 0] << 0;
           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
  };

  //----------
  // finalization

  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;

  h1 += h2; h1 += h3; h1 += h4;
  h2 += h1; h3 += h1; h4 += h1;

  h1 = fmix(h1);
  h2 = fmix(h2);
  h3 = fmix(h3);
  h4 = fmix(h4);

  h1 += h2; h1 += h3; h1 += h4;
  h2 += h1; h3 += h1; h4 += h1;

  ((uint32_t*)out)[0] = h1;
  ((uint32_t*)out)[1] = h2;
  ((uint32_t*)out)[2] = h3;
  ((uint32_t*)out)[3] = h4;
}

//-----------------------------------------------------------------------------

void MurmurHash3_x64_128 ( const void * key, const int len,
                           const uint32_t seed, void * out )
{
  const uint8_t * data = (const uint8_t*)key;
  const int nblocks = len / 16;

  uint64_t h1 = seed;
  uint64_t h2 = seed;

  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

  //----------
  // body

  const uint64_t * blocks = (const uint64_t *)(data);

  for(int i = 0; i < nblocks; i++)
  {
    uint64_t k1 = getblock(blocks,i*2+0);
    uint64_t k2 = getblock(blocks,i*2+1);

    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;

    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;

    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;

    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
  }

  //----------
  // tail

  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);

  uint64_t k1 = 0;
  uint64_t k2 = 0;

  switch(len & 15)
  {
  case 15: k2 ^= uint64_t(tail[14]) << 48;
  case 14: k2 ^= uint64_t(tail[13]) << 40;
  case 13: k2 ^= uint64_t(tail[12]) << 32;
  case 12: k2 ^= uint64_t(tail[11]) << 24;
  case 11: k2 ^= uint64_t(tail[10]) << 16;
  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;

  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
  };

  //----------
  // finalization

  h1 ^= len; h2 ^= len;

  h1 += h2;
  h2 += h1;

  h1 = fmix(h1);
  h2 = fmix(h2);

  h1 += h2;
  h2 += h1;

  ((uint64_t*)out)[0] = h1;
  ((uint64_t*)out)[1] = h2;
}

//-----------------------------------------------------------------------------


================================================
FILE: toolkits/extensions/MurmurHash3.h
================================================
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER)

typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned __int64 uint64_t;

// Other compilers

#else	// defined(_MSC_VER)

#include <stdint.h>

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );

void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );

void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );

//-----------------------------------------------------------------------------

#endif // _MURMURHASH3_H_


================================================
FILE: toolkits/extensions/example.txt
================================================
void pagerank(extension_graph& graph) {
  graph.transform_field("pr", [](var v){ return 0.15; });    
  graph.GAS(
      [](const vars&) { return graphlab::IN_EDGES; },             // gather_edges
      [](const vars&, vars&, const vars& other, edge_direction) { // gather
          return vget<double>(other.field("pr")) / 
                vget<double>(other.field("out_degree")) ;
      }, 
      [](var& a, const var& b) {                                  // combine
          vget<double>(a) += vget<double>(b); 
      }, 
      [](vars& v, const var& result) -> bool {                    // apply
          double pr = 0.15 + 0.85 * vget<double>(result); 
          v.field("change") = 
              std::fabs(pr - vget<double>(v.field("pr"))) / 
              vget<double>(v.field("out_degree"));
          v.field("pr") = pr;         
          return false; 
      }, 
      [](const vars& v) {                                        // scatter_edges
          return vget<double>(v.field("change")) > 0.01 ? 
                                graphlab::OUT_EDGES : graphlab::NO_EDGES; 
      },
      [](const vars&, const vars&, const vars&, edge_direction) {// scatter 
          return true; 
      }
  ); // scatter
}


================================================
FILE: toolkits/extensions/extension.cpp
================================================
#include <map>
#include <string>
#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/parallel/lockfree_push_back.hpp>
#include "extension_gas.hpp"

namespace graphlab {
namespace extension {

var& operator+=(var& value, const var& other) {
  const double* other_double = boost::get<double>(&other);
  const std::string* other_string = boost::get<std::string>(&other);
  const Eigen::VectorXd* other_vector = boost::get<Eigen::VectorXd>(&other);
  const Eigen::MatrixXd* other_matrix = boost::get<Eigen::MatrixXd>(&other);

  if ( double* val = boost::get<double>( &value) ) {
    if (other_double!= NULL) value = (double)(*val) + (*other_double);
    else ASSERT_MSG(false, "Type mismatch in operator+=");
  } else if ( std::string* val = boost::get<std::string>( &value) ) {
    if (other_string != NULL) (*val) += (*other_string);
    else ASSERT_MSG(false, "Type mismatch in operator+=");
  } else if ( Eigen::VectorXd* val = boost::get<Eigen::VectorXd>( &value) ) {
    if (other_vector!= NULL) (*val) += (*other_vector);
    else ASSERT_MSG(false, "Type mismatch in operator+=");
  } else if ( Eigen::MatrixXd* val = boost::get<Eigen::MatrixXd>( &value) ) {
    if (other_matrix!= NULL) (*val) += (*other_matrix);
    else ASSERT_MSG(false, "Type mismatch in operator+=");
  }
  return value;
}


// lets get more than we will ever need so it will never need to resize
std::vector<gas_op_descriptor> descriptor_set(65536);
lockfree_push_back<std::vector<gas_op_descriptor> > 
          descriptor_access(descriptor_set, 0);
 

var vars::empty_var;
}
}


================================================
FILE: toolkits/extensions/extension_data.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_DATA_HPP
#define GRAPHLAB_EXTENSION_DATA_HPP

#include <graphlab/graph/distributed_graph.hpp>
#include <boost/variant.hpp>
#include <Eigen/Dense>
#include <string>
#include <map>
#include "../collaborative_filtering/eigen_serialization.hpp"
#include "MurmurHash3.h"

// Here I define the basic "var" variant type.
// which is basically a boost::variant around a double, string, vector and matrix

namespace graphlab {
namespace extension {

// the key here is that we are going to lock down the 
// type system for GraphLab to a wrapped boost::variant
typedef boost::variant<double, 
                       std::string,
                       Eigen::VectorXd,
                       Eigen::MatrixXd> var;

extern var& operator+=(var& value, const var& other);
   

/**
 * Gets a typechecked value from the variant var
 */
template <typename T>
inline const T& get(const var& v) {
  const T* val = boost::get<const T>(&v);
  if (val == NULL) {
    logger_once(LOG_ERROR, "Reading invalid type from var");
    static T t;
    return t;
  }
  return *val;
}

/**
 * Gets a typechecked value from the variant var
 */
template <typename T>
inline T& get(var& v) {
  T* val = boost::get<T>(&v);
  if (val == NULL) {
    logger_once(LOG_ERROR, "Reading invalid type from var");
    static T t;
    return t;
  }
  return *val;
}


//////////////////////////////////////////////////////////////
// Here we briefly escape out tp the global namespace       //
// to define serializers and deserializers for the var      //
//////////////////////////////////////////////////////////////
}}

BEGIN_OUT_OF_PLACE_SAVE(oarc, graphlab::extension::var, value) {
  if ( const double* val = boost::get<double>( &value) ) {
    oarc << char(1) << (*val);
  } else if ( const std::string* val = boost::get<std::string>( &value) ) {
    oarc << char(2) << (*val);
  } else if ( const Eigen::VectorXd* val = boost::get<Eigen::VectorXd>( &value) ) {
    oarc << char(3) << (*val);
  } else if ( const Eigen::MatrixXd* val = boost::get<Eigen::MatrixXd>( &value) ) {
    oarc << char(4) << (*val);
  }
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(iarc, graphlab::extension::var, value) {
  char content_type;
  iarc >> content_type;
  if (content_type == 1) {
    double val; iarc >> val; value = val;
  } else if (content_type == 2) {
    std::string val; iarc >> val; value = val;
  } else if (content_type == 3) {
    Eigen::VectorXd val; iarc >> val; value = val;
  } else if (content_type == 4) {
    Eigen::MatrixXd val; iarc >> val; value = val;
  }
} END_OUT_OF_PLACE_LOAD()
namespace graphlab { namespace extension { 

//////////////////////////////////////////////////////////////
// Returning to your regular progamming                     //
//////////////////////////////////////////////////////////////


typedef uint32_t key_id_type;

/** vars is a dynamic struct with a mapping from string->var.
  * Internally, it is stored as key_id_type->var where the key is
  * a hash value of the string.
  * we assume that the murmurhash will never collide for the small
  * namespaces considered.
 */
inline key_id_type get_id_from_name(const char* key) {
  uint32_t ret = 0;
  MurmurHash3_x86_32((void*)key, strlen(key), 12345, (void*)(&ret));
  return ret;
}
// overload for string
inline key_id_type get_id_from_name(const std::string& key) {
  uint32_t ret = 0;
  MurmurHash3_x86_32((void*)key.c_str(), key.length(), 12345, (void*)(&ret));
  return ret;
}
// overload for int
inline key_id_type get_id_from_name(key_id_type key) {
  return key;
}


/**
 * A dynamic struct storing mappings from string->var where var
 * is a variant.
 * fields can be accessed with operator() or ".field()"
 */
struct vars {
  std::vector<std::pair<key_id_type, var*> > table;
  static var empty_var;
  simple_spinlock lock;
  vars() { }
  ~vars() {
    }

  void clear() {
    lock.lock();
    for(const std::pair<key_id_type, var*>& p: table) {
      delete p.second;
    }
    table.clear();
    lock.unlock();
  }

  vars& operator=(const vars& v) {
    clear();
    for (size_t i = 0;i < v.table.size(); ++i) {
      field(v.table[i].first) = *(v.table[i].second);
    }
    return *this;
  }

  void save(oarchive& oarc) const {
    lock.lock();
    oarc << (size_t)table.size();
    for(const std::pair<key_id_type, var*>& p: table) {
      oarc << p.first << (*p.second);
    }
    lock.unlock();
  }
  
  void load(iarchive& iarc) {
    size_t tsize;
    iarc >> tsize;
    for (size_t i = 0;i < tsize; ++i) {
      key_id_type key; iarc >> key;
      iarc >> field(key);
    }
  }

  var& operator()(const std::string& key) {
    return field(key);
  }
  const var& operator()(const std::string& key) const {
    return field(key);
  }
  var& operator()(const char* key) {
    return field(key);
  }
  const var& operator()(const char* key) const {
    return field(key);
  }
  var& operator()(key_id_type key) {
    return field(key);
  }
  const var& operator()(key_id_type key) const {
    return field(key);
  }

  var& field(const std::string& _key) {
    key_id_type key = get_id_from_name(_key);
    return field(key);
  }

  const var& field(const std::string& _key) const {
    key_id_type key = get_id_from_name(_key);
    return field(key);
  }
  
  var& field(const char* _key) {
    key_id_type key = get_id_from_name(_key);
    return field(key);
  }
  const var& field(const char* _key) const {
    key_id_type key = get_id_from_name(_key);
    return field(key);
  }
   
  var& field(key_id_type key) {
    lock.lock();

    for(std::pair<key_id_type , var*>& p: table) {
      if (p.first == key) {
        lock.unlock();
        return *(p.second);
      }
    }
    var* ret = new var;
    // force slow resize to limit memory usage
    // assume that field creation is not a common operation.
    table.reserve(table.size() + 1);
    table.push_back(std::make_pair(key, ret));
    lock.unlock();
    return *ret;
  }

  const var& field(key_id_type key) const {
    for(const std::pair<key_id_type , var*>& p: table) {
      if (p.first == key) {
        return *(p.second);
      }
    }
    return empty_var;
  }

}; 


typedef distributed_graph<vars, vars> internal_graph_type;

} // namespace extension
} // namespace graphlab

#endif


================================================
FILE: toolkits/extensions/extension_gas.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_GAS_HPP
#define GRAPHLAB_EXTENSION_GAS_HPP

#include <vector>
#include <graphlab/vertex_program/ivertex_program.hpp>
#include "extension_data.hpp"
#include "extension_gas_base_types.hpp"

namespace graphlab {
namespace extension{

  /// A collection of all the user ops together
struct gas_op_descriptor{
  gather_functor* gather_op;
  gather_select_functor* gather_select_op;
  combiner_functor* combiner_op;
  apply_functor* apply_op;
  scatter_functor* scatter_op;
  scatter_select_functor* scatter_select_op;
};
// the active set of GAS sets to run
extern std::vector<gas_op_descriptor> descriptor_set;
extern lockfree_push_back<std::vector<gas_op_descriptor> > descriptor_access;
typedef uint32_t descriptor_id_type;


// A wrapper around the gather operation
struct gather_var {
  var v;
  descriptor_id_type descriptor_id;
  combiner_functor* combiner_op;

  gather_var():descriptor_id(-1), combiner_op(NULL) { }
  var& operator+=(const var& other) {
    (*combiner_op)(v, other);
    return v;
  }

  var& operator+=(const gather_var& other) {
    (*combiner_op)(v, other.v);
    return v;
  }


  inline void save(graphlab::oarchive& oarc) const {
    oarc << v << descriptor_id;
  }

  inline void load(graphlab::iarchive& iarc) {
    iarc >> v >> descriptor_id;
    gas_op_descriptor gas;
    bool ret = descriptor_access.query_unsafe(descriptor_id, gas);
    if (ret) combiner_op = gas.combiner_op;
  }
};


// a wrapper around the message
struct message_type: public graphlab::IS_POD_TYPE {
  descriptor_id_type descriptor;
  message_type(): descriptor(-1){ }
  message_type(descriptor_id_type d): descriptor(d) { };
  message_type& operator+=(const message_type& other) {
    return *this;
  }
};

struct extension_update_functor: 
    public graphlab::ivertex_program<internal_graph_type, gather_var, message_type> {
                                                                                  
public:
    typedef graphlab::ivertex_program<internal_graph_type, var> parent_type;

    descriptor_id_type descriptor_id;
//    gas_op_descriptor gas;

    extension_update_functor():descriptor_id(-1) {
    }
    
    extension_update_functor(size_t id):descriptor_id(id) { }

    inline void init(icontext_type& context,
                     const vertex_type& vertex, 
                     const message_type& msg) { 
      descriptor_id = msg.descriptor;
    }


    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const {
      gas_op_descriptor* gas = 
          descriptor_access.query_unsafe(descriptor_id);
      ASSERT_TRUE(gas != NULL);
      if (gas->gather_select_op) {
        return (*gas->gather_select_op)(vertex.data());
      } else {
        return ALL_EDGES;
      }
    }

    inline gather_var gather(icontext_type& context, 
                      const vertex_type& vertex,
                      edge_type& edge) const {
      gas_op_descriptor* gas = 
          descriptor_access.query_unsafe(descriptor_id);
      ASSERT_TRUE(gas != NULL);
      vertex_type other_vertex = edge.source().id() == vertex.id() ? 
                                    edge.target() : edge.source();
      gather_var ret;
      ret.v  = (*gas->gather_op)(vertex.data(), 
                                edge.data(), 
                                other_vertex.data(),
                                edge.source().id() == vertex.id() ? 
                                      OUT_EDGE : IN_EDGE);
      ret.descriptor_id = descriptor_id;
      ret.combiner_op = gas->combiner_op;
      return ret;
    }

    inline void apply(icontext_type& context, vertex_type& vertex,
                      const gather_type& total) {
      gas_op_descriptor* gas = 
          descriptor_access.query_unsafe(descriptor_id);
      ASSERT_TRUE(gas != NULL);
      bool sched = (*gas->apply_op)(vertex.data(), total.v);
      if (sched) context.signal(vertex, descriptor_id);
    }

    edge_dir_type scatter_edges(icontext_type& context,
                               const vertex_type& vertex) const {
      gas_op_descriptor* gas = 
          descriptor_access.query_unsafe(descriptor_id);
      ASSERT_TRUE(gas != NULL);
      if (gas->scatter_select_op) {
        return (*gas->scatter_select_op)(vertex.data());
      } else {
        return ALL_EDGES;
      }
    }

    inline void scatter(icontext_type& context, const vertex_type& vertex,
                 edge_type& edge) const {
      gas_op_descriptor* gas = 
          descriptor_access.query_unsafe(descriptor_id);
      ASSERT_TRUE(gas != NULL);
      vertex_type other_vertex = edge.source().id() == vertex.id() ? 
          edge.target() : edge.source();

      bool ret = (*gas->scatter_op)(vertex.data(), 
                                   edge.data(), 
                                   other_vertex.data(),
                                   edge.source().id() == vertex.id() ? 
                                   OUT_EDGE : IN_EDGE);
      if (ret) {
        context.signal(other_vertex, descriptor_id);
      }
    }

    inline void save(graphlab::oarchive& oarc) const {
      oarc << descriptor_id;
    }

    inline void load(graphlab::iarchive& iarc) {
      iarc >> descriptor_id;
    }
};


} // namespace extension
} // namespace graphlab

#endif


================================================
FILE: toolkits/extensions/extension_gas_base_types.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_GAS_BASE_TYPES_HPP
#define GRAPHLAB_EXTENSION_GAS_BASE_TYPES_HPP
#include "extension_data.hpp"

namespace graphlab {
namespace extension {
// edge direction enum
enum edge_direction {
  IN_EDGE, OUT_EDGE
};


//////////////////////////////////////////////////////////////
// Here we will produce base types of all the functors      //
//////////////////////////////////////////////////////////////

// the combiner base type
struct combiner_functor {
  virtual void operator()(var& , const var&) = 0;
};


// the gather base type
struct gather_functor {
  virtual var operator()(const vars& center, 
                         vars& edge, 
                         const vars& other,
                         edge_direction direction) = 0;
};

//gather select base type
struct gather_select_functor {
  virtual edge_dir_type operator()(const vars& center) {
    return ALL_EDGES;
  }
};

// the apply base type
struct apply_functor {
  // return true to schedule self
  virtual bool operator()(vars& center, 
                          const var& gather_result) = 0;
};

// scatter select base type
struct scatter_select_functor {
  virtual edge_dir_type operator()(const vars& center) {
    return ALL_EDGES;
  }
};


// the scatter base type
struct scatter_functor {
  // return true to schedule other
  virtual bool operator()(const vars& center, 
                          vars& edge, 
                          const vars& other,
                          edge_direction direction) = 0;
};


} // extension
} // graphlab 
#endif 


================================================
FILE: toolkits/extensions/extension_gas_lambda_wrapper.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_GAS_LAMBDA_WRAPPER
#define GRAPHLAB_EXTENSION_GAS_LAMBDA_WRAPPER


#include "extension_gas.hpp"
/*
  Implements a collection of lightweight generic wrappers around the GAS, 
  transform and map operations.
  These wrappers do nothing but store a functor which is compatible with the 
  base types in extension_gas_base_types.hpp, and call them.
*/

namespace graphlab {
namespace extension {


template <typename FieldType, typename Functor>
struct transform_field_wrapper {
  Functor* f;
  key_id_type field;
  typedef void result_type;
  
  transform_field_wrapper(Functor* f, FieldType field):
        f(f),field(get_id_from_name(field)) { }

  void operator()(internal_graph_type::vertex_type& vtx) {
    var& v = vtx.data()(field);
    v = (*f)(v);
  }
};


template <typename FieldType, typename Functor>
struct map_field_wrapper {
  Functor* f;
  key_id_type field;
  typedef var result_type;

  map_field_wrapper(Functor* f, FieldType field):
        f(f),field(get_id_from_name(field)) { }

  var operator()(internal_graph_type::vertex_type& vtx) {
    var& v = vtx.data()(field);
    return (*f)(v);
  }
};


// the combiner base type
template <typename CombinerType>
struct generic_combiner: public combiner_functor {
  CombinerType* ct;
  void operator()(var& a, const var& b) {
    (*ct)(a, b);
  }
};

template <typename GatherSelectType>
struct generic_gather_select : public gather_select_functor {
  GatherSelectType* gt;
  edge_dir_type operator()(const vars& center) {
    return (*gt)(center);
  }
};


template <typename GatherType>
struct generic_gather : public gather_functor {
  GatherType* gt;
  var operator()(const vars& center, 
                 vars& edge, 
                 const vars& other,
                 edge_direction direction) {
    return (*gt)(center, edge, other, direction);
  }
};

template <typename ApplyType>
struct generic_apply : public apply_functor {
  ApplyType* at;
  bool operator()(vars& center, 
                  const var& gather_result) {
    return (*at)(center, gather_result);
  }
};

template <typename ScatterSelectType>
struct generic_scatter_select : public scatter_select_functor {
  ScatterSelectType* st;
  edge_dir_type operator()(const vars& center) {
    return (*st)(center);
  }
};


template <typename ScatterType>
struct generic_scatter : public scatter_functor {
  ScatterType* st;
  bool operator()(const vars& center, 
                 vars& edge, 
                 const vars& other,
                 edge_direction direction) {
    return (*st)(center, edge, other, direction);
  }
};


} // extension
} // graphlab
#endif


================================================
FILE: toolkits/extensions/extension_graph.cpp
================================================
#include <graphlab.hpp>
#include "extension_graph.hpp"

namespace graphlab {
namespace extension {


void extension_graph::synchronous_dispatch_new_engine(size_t desc_id) {
  synchronous_engine<extension_update_functor> sync_engine(rmi.dc(), 
                                                           internal_graph,
                                                           __glopts);
  sync_engine.signal_all(desc_id);
  sync_engine.start();
}


} // extension
} // graphlab


================================================
FILE: toolkits/extensions/extension_graph.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_GRAPH_HPP
#define GRAPHLAB_EXTENSION_GRAPH_HPP

#include "extension_data.hpp"
#include "extension_gas.hpp"
#include "extension_gas_lambda_wrapper.hpp"
#include "extension_main.hpp"
namespace graphlab {
namespace dc_impl {
extern distributed_control* get_last_dc();
}
namespace extension {

struct extension_graph_writer{
  std::string field;
  extension_graph_writer(std::string field):field(field) { }
  std::string save_vertex(internal_graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data()(field) << "\n";
    return strm.str();
  }

  std::string save_edge(internal_graph_type::edge_type e) {
    std::stringstream strm;
    strm << e.source().id() << "\t" << e.target().id() << e.data()(field) << "\n";
    return strm.str();
  }
}; 


class extension_graph {
 public:
  dc_dist_object<extension_graph> rmi;
  internal_graph_type internal_graph;
  mutex lock;
  bool finalized;

  extension_graph()
     :rmi(*dc_impl::get_last_dc(), this), 
     internal_graph(*dc_impl::get_last_dc(), __glopts),finalized(false) { }


  extension_graph(distributed_control& dc, 
                  const graphlab_options& opts = graphlab_options() ) 
     :rmi(dc, this), internal_graph(dc, opts),finalized(false) { }

  template <typename FieldType, typename TransformType>
  void transform_field(FieldType field,
                          TransformType transform_functor) {
    finalize();
    lock.lock();
    transform_field_wrapper<FieldType, TransformType> fw(&transform_functor, field);
    internal_graph.transform_vertices(fw);
    lock.unlock();
  }

/*
  template <typename FieldType, typename MapFunctor>
  var map_reduce_field(FieldType field,
                       MapFunctor map_functor) {
    finalize();
    lock.lock();
    map_field_wrapper<FieldType, MapFunctor> mw(&map_functor, field);
    var ret = internal_graph.map_reduce_vertices<var>(mw);
    lock.unlock();
    return ret;
  }
*/

  void load_structure(std::string prefix, std::string format) {
    lock.lock();
    internal_graph.load_format(prefix, format);
    lock.unlock();
  }

  void save_vertices(std::string prefix, std::string field) {
    internal_graph.save(prefix, extension_graph_writer(field),
                        false,    // do not gzip
                        true,     // save vertices
                        false);   // do not save edges
  }

  internal_graph_type& graph() {
    return internal_graph;
  }

  void finalize() {
    lock.lock();
    if (!finalized) {
      internal_graph.finalize();
      internal_graph.transform_vertices([] (internal_graph_type::vertex_type& v) {
                                        v.data().field("in_degree") = (double)v.num_in_edges();
                                        v.data().field("out_degree") = (double)v.num_out_edges();
                                        });
    }
    lock.unlock();
  }

  void synchronous_dispatch_new_engine(size_t desc_id);

  /// GAS which defaults to all out and all in edges
  template <typename GatherType,
           typename CombinerType,
           typename ApplyType,
           typename ScatterType>
    void GAS(GatherType gather,
             CombinerType combiner,
             ApplyType apply,
             ScatterType scatter,
             size_t iterations = 0) {
      finalize();
      generic_gather<GatherType> g;
      g.gt = &gather;

      generic_combiner<CombinerType> c;
      c.ct = &combiner;

      generic_apply<ApplyType> a;
      a.at = &apply;

      generic_scatter<ScatterType> s;
      s.st = &scatter;

      gas_op_descriptor gd;
      gd.gather_select_op = NULL;
      gd.gather_op = &g;
      gd.combiner_op = &c;
      gd.apply_op = &a;
      gd.scatter_select_op = NULL;
      gd.scatter_op = &s;

      lock.lock();
      descriptor_id_type descid = descriptor_access.push_back(gd);
      lock.unlock();

      synchronous_dispatch_new_engine(descid);
    }

  /// Regular GAS
  template <typename GatherSelectType,
           typename GatherType,
           typename CombinerType,
           typename ApplyType,
           typename ScatterSelectType,
           typename ScatterType>
    void GAS(GatherSelectType gatherselect,
             GatherType gather,
             CombinerType combiner,
             ApplyType apply,
             ScatterSelectType scatterselect,
             ScatterType scatter,
             size_t iterations = 0) {
      finalize();
      generic_gather_select<GatherSelectType> gs;
      gs.gt = &gatherselect;

      generic_gather<GatherType> g;
      g.gt = &gather;

      generic_combiner<CombinerType> c;
      c.ct = &combiner;

      generic_apply<ApplyType> a;
      a.at = &apply;

      generic_scatter_select<ScatterSelectType> ss;
      ss.st = &scatterselect;

      generic_scatter<ScatterType> s;
      s.st = &scatter;

      gas_op_descriptor gd;
      gd.gather_select_op = &gs;
      gd.gather_op = &g;
      gd.combiner_op = &c;
      gd.apply_op = &a;
      gd.scatter_select_op = &ss;
      gd.scatter_op = &s;

      lock.lock();
      descriptor_id_type descid = descriptor_access.push_back(gd);
      lock.unlock();

      synchronous_dispatch_new_engine(descid);
    }
};


} // namespace extension
} // namespace graphlab

#endif


================================================
FILE: toolkits/extensions/extension_main.cpp
================================================
#include <graphlab/util/mpi_tools.hpp>
#include <graphlab/options/command_line_options.hpp>
#include <graphlab/rpc/dc.hpp>
int __real_main(int argc, char** argv);
graphlab::command_line_options __glopts("");


int actual_main(int argc, char** argv) {
  graphlab::mpi_tools::init(argc, argv);
  if (!__glopts.parse(argc, argv, true)) return false;
  // rebuild argc argv with unrecognized options
  std::vector<std::string> vs = __glopts.unrecognized();
  char** newargv = new char*[vs.size() + 1];
  newargv[0] = argv[0];
  for (size_t i = 0;i < vs.size(); ++ i) {
    newargv[i + 1] = (char*)(vs[i].c_str());
  }
  int ret = __real_main(vs.size() + 1, newargv);
  if (graphlab::dc_impl::get_last_dc()) delete graphlab::dc_impl::get_last_dc();
  graphlab::mpi_tools::finalize();
  return ret;
}

#if 1
// don't seem to be able to get -wrap main working correctly
int main(int argc, char** argv) {
  return actual_main(argc, argv);
}
#else
int __wrap_main(int argc, char** argv) {
  return actual_main(argc, argv);
}

#endif
 

================================================
FILE: toolkits/extensions/extension_main.hpp
================================================
#ifndef GRAPHLAB_EXTENSION_MAIN_HPP
#define GRAPHLAB_EXTENSION_MAIN_HPP
#include <graphlab/options/command_line_options.hpp>
extern graphlab::command_line_options __glopts;
#endif


================================================
FILE: toolkits/extensions/extension_pagerank.cpp
================================================
#include "extensions.hpp"

namespace graphlab {
namespace extension {

void pagerank(extension_graph& graph, 
              const std::string PR_FIELD_NAME,
              double tolerance) {
  const std::string PR_CHANGE_NAME = PR_FIELD_NAME + "_change";

  key_id_type PR_FIELD = get_id_from_name(PR_FIELD_NAME);
  key_id_type PR_CHANGE = get_id_from_name(PR_CHANGE_NAME);
  key_id_type OUT_DEG = get_id_from_name("out_degree");

  graph.transform_field(PR_FIELD, [](var v){ return 0.15; });    
  timer ti;
  graph.GAS(
      [](const vars&) { return graphlab::IN_EDGES; },             // gather_edges
      [=](const vars&, vars&, const vars& other, edge_direction) { // gather
          return get<double>(other(PR_FIELD)) / 
                get<double>(other(OUT_DEG)) ;
      }, 
      [](var& a, const var& b) {                                  // combine
          get<double>(a) += get<double>(b); 
      }, 
      [=](vars& v, const var& result) -> bool {                    // apply
          double pr = 0.15 + 0.85 * get<double>(result); 
          v(PR_CHANGE) = 
              std::fabs(pr - get<double>(v(PR_FIELD))) / 
              get<double>(v(OUT_DEG));
          v(PR_FIELD) = pr;         
          return false; 
      }, 
      [=](const vars& v) {                                        // scatter_edges
          return get<double>(v.field(PR_CHANGE)) > tolerance ? 
                                graphlab::OUT_EDGES : graphlab::NO_EDGES; 
      },
      [](const vars&, const vars&, const vars&, edge_direction) {// scatter 
          return true; 
      }
  ); // scatter
  std::cout << "PageRank complete in " << ti.current_time() << "s" << std::endl;
}


} // namespace extension
} // namespace graphlab


================================================
FILE: toolkits/extensions/extensions.hpp
================================================
#include "extension_graph.hpp"


#ifndef GRAPHLAB_EXTENSIONS_HPP
#define GRAPHLAB_EXTENSIONS_HPP
#if 1
// we have to use this unreliable hack
// don't seem to be able to get -wrap main
// working. TOFIX
#define main __real_main
#endif


namespace graphlab {
namespace extension {

// prototype for all implemented extensions
void pagerank(extension_graph& graph, 
              const std::string PR_FIELD,
              double tolerance);

} // namespace extension
} // namespace graphlab


#endif 


================================================
FILE: toolkits/extensions/pagerank_extension_driver.cpp
================================================
#include "extensions.hpp"
#include <graphlab/util/timer.hpp>
using namespace graphlab::extension;

int main(int argc, char** argv) {
  extension_graph graph;
  if (argc < 2) {
    std::cout << argv[0] << " [input prefix] optional:[output prefix]\n"; 
    return 0;
  }
  graph.load_structure(argv[1], "snap");
  pagerank(graph, "pr", 0.01);
  if (argc > 2) {
    graph.save_vertices(argv[2], "pr");
  }
}


================================================
FILE: toolkits/graph_algorithms/CMakeLists.txt
================================================
project(Djikstra)
add_graphlab_executable(djikstra djikstra.cpp)
add_graphlab_executable(prestige prestige.cpp)
add_graphlab_executable(betweeness betweeness.cpp)
add_graphlab_executable(closeness closeness.cpp)


================================================
FILE: toolkits/graph_algorithms/betweeness.cpp
================================================
/*
 * Copyright (c) 2014 Daniel McEnnis.
 * portions of main Copyright (c) 2009 Carnegie Mellon
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <stdlib.h>
#include <math.h>
#include <graphlab.hpp>

/*
 * Djikstra Graph Node Class
 *
 * This class contains the information about a single graphlab node.
 * id - current best path's previous node id - next node on path to root
 * cost - current cost of the path to route by the current route: Note - this
 *       can become inaccurate in the course of calculations and must be recalculated
 *       by traversing the shortest path tree to get an accurate result.
 * launched - has execution of this node been sheduled
 * done - has execution of this node been completed
 */
class DjikstraNode {
public:
    long id;
    double cost;
    bool launched;
    bool done;

    DjikstraNode(){
        id = 0;
        cost = 1e100;
        launched = false;
        done=false;
    }

  void save(graphlab::oarchive& oarc) const {
    oarc << id << cost << launched << done;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> id >> cost >> launched >> done;
  }
};

/*
 * PrestigeAnalysisNode
 * Graph Node class for running multiple djikstra tree algorithms simultaneously
 * Contains a map of node id's to DjikstraNode instances
 * bookkeeping components
 *
 */
class PrestigeAnalysisNode {
public:
    std::map<long,DjikstraNode> djikstra_pieces;
    double local_value;
    double total;
    long count;
    int edge_count;

    PrestigeAnalysisNode(){
        local_value=0.0;
        total=0.0;
        count=0;
        edge_count=-1;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << djikstra_pieces << local_value << total << count << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> djikstra_pieces >> local_value >> total >> count >> edge_count;
    }
};

/*
 * Gather class for the Djikstra algorithm.
 * id: node id of the incoming edge's other end
 * cost: shortest path cost at the time this node gathers its edges
 * edge_count: a count of gathered edges
 *
 */
class Gather {
public:
    unsigned long id;
    double cost;
    int edge_count;

    Gather(){
        id=0;
        cost=0.0;
        edge_count=1;
    }

    Gather& operator+=(const Gather& other){
        if(other.id < 0){
            return *this;
        }
        if(this->id < 0){
            return *this;
        }
        if (cost <= other.cost){
            this->edge_count++;
            return *this;
        }
        this->edge_count += other.edge_count;
        return *this;
    }


    void save(graphlab::oarchive& oarc) const {
       oarc << id << cost << edge_count;
     }

     void load(graphlab::iarchive& iarc) {
       iarc >> id >> cost >> edge_count;
     }

};

/*
 * GatherMultiTree
 * map of djisktra root id's to their asociated content for that tree
 *
 */
class GatherMultiTree {
public:
    std::map<long,Gather> content;
    int edge_count;

    GatherMultiTree(){
        edge_count=0;
    }

    GatherMultiTree& operator+=(const GatherMultiTree& other){
        return *this;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << content << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> content >> edge_count;
    }
};


typedef PrestigeAnalysisNode vertex_data_type;

typedef GatherMultiTree gather_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, double> graph_type;

/*
 * Loads graphs in the form 'id (id edge_strength)*'
 *
 */
bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;
  // first entry in the line is a vertex ID
  strm >> vid;
  PrestigeAnalysisNode node;
  // insert this vertex with its label
  graph.add_vertex(vid, node);
  // while there are elements in the line, continue to read until we fail
  double edge_val=1.0;
  while(1){
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    strm >> edge_val;
    if (strm.fail())
      break;
    graph.add_edge(vid, other_vid,edge_val);
  }

  return true;
}

/*
 * Algorithm class whose sole purpose is to reset launched and done booleans
 * for all id's in a PrestigeAnalysisNode
 */
class ClearBooleans :
        public graphlab::ivertex_program<graph_type, gather_type>,
        public graphlab::IS_POD_TYPE {
public:
  edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        GatherMultiTree g;
        return g;
  }

  void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
      for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
          iter != vertex.data().djikstra_pieces.end(); ++iter){
          long key = iter->first;
          vertex.data().djikstra_pieces[key].launched = false;
          vertex.data().djikstra_pieces[key].done = false;
          vertex.data().djikstra_pieces[key].cost = 0.0;
      }
  }

  edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
          return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
  }
};

/*
 * Djikstra Algorithm Class
 *
 * Starting from the starting nodes, create an id for this root and signal
 * all neighbors to start the calculations. Set launched when started, done
 * when all edges have been signaled.
 *
 * As a signal is receieved collect edges to determine if the best path has
 * changed.  If it has, update. If the first signal is receieved, marked
 * the node as launched and then mark the node done after signaling neighbors.
 *
 * The process terminates when all nodes active have no neighbors that are not done.
 */
class DjikstraAlgorithm :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::IN_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    Gather g;
    GatherMultiTree tree;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key=iter->first;
            if((edge.source().data().djikstra_pieces[key].launched == true)&&
                    (edge.source().data().djikstra_pieces[key].done == false)){
                double c = edge.data() + edge.source().data().djikstra_pieces[key].cost;
                g.cost = c;
                g.id = edge.source().data().djikstra_pieces[key].id;
                g.edge_count = 1;
                tree.content[key] = g;
            }else{
                g.id=0;
            }
        }
    return tree;
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces[key].launched == false){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                    if(vertex.data().djikstra_pieces[key].cost > total.content.find(key)->second.cost){
                    vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                    vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
                    }else{
                    vertex.data().djikstra_pieces[key].done = true;
                    }
            }else{
                vertex.data().djikstra_pieces[key].done = true;
            }
        }
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)==vertex.data().djikstra_pieces.end()){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
        bool done = true;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)->second.launched &&
                    !vertex.data().djikstra_pieces.find(key)->second.done){
                done = false;
            }
        }
        if(!done){
            return graphlab::OUT_EDGES;
        }else{
            return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                context.signal(edge.target());
            }
    }
  }
};

size_t num_vertices = 3000;
size_t desired_vertices_count = 3000;
size_t selected_vertices_count = 0;

/*
 * For every node, print the previous node in its spanning tree for all spanning trees this node is in.
 *
 */
struct betweeness_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id();
    double betweeness = 0.0;
    for(std::map<long, DjikstraNode>::const_iterator iter = v.data().djikstra_pieces.begin();
        iter != v.data().djikstra_pieces.end(); ++iter){
        betweeness += iter->second.cost;
    }
    betweeness /= selected_vertices_count;
    strm << "\t" << betweeness << std::endl;
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};

/*
 * Select ~3000 root nodes or an exact count which gives up around +/-3% accuracy
 * in prestige measures. It is a constant memory random selector.
 */
bool selectVertices(const graph_type::vertex_type& vertex){
    unsigned int r = random();
    std::cout << "Random seed is " << r << std::endl;
    if(r < (desired_vertices_count * RAND_MAX / num_vertices)){
          selected_vertices_count++;
          return true;
    }
    return false;
}


/*
 * Gather object that keeps track of betweeness counts for each spanning tree.
 *
 */
class BetweenessGather{
public:
    std::map<long,long> counts;
    std::map<long,long> edge_count;
	
 void save(graphlab::oarchive& oarc) const {
    oarc << counts << edge_count;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> counts >> edge_count;
  }

  BetweenessGather& operator+=(const BetweenessGather& gather){
    for(std::map<long, long>::const_iterator iter = this->counts.begin();
        iter != this->counts.end(); ++iter ){
                long key = iter->first;
                this->counts[key] += gather.counts.find(key)->second;
                this->edge_count[key] += gather.edge_count.find(key)->second;
	}
    for(std::map<long, long>::const_iterator iter = gather.counts.begin();
            iter != gather.counts.end(); ++iter){
                long key = iter->first;
                if(this->counts.find(key) != this->counts.end()){
                        this->counts[key] = gather.counts.find(key)->second;
                        this->edge_count[key] = gather.edge_count.find(key)->second;
                }
    }
    return *this;
  }


};

/*
 * Walk backwards from leaf nodes (those that have no nodes pointing to them in
 * the gather step). Each signals the node referenced in its internal spanning tree
 * record. This is performed simultaneously for each spanning tree in the set.
 *
 * The betweeness score is cached in the cost field.
 *
 */
class BetweenessAlgorithm :
  public graphlab::ivertex_program<graph_type, BetweenessGather>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::IN_EDGES;
    }

    BetweenessGather gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
	BetweenessGather g;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key= iter->first;
        if(edge.target().data().djikstra_pieces[key].id == vertex.id()){
            if(edge.source().data().djikstra_pieces[key].launched == true){
                g.counts[key] = edge.source().data().djikstra_pieces[key].cost;
                g.edge_count[key] = 1;
            }
        }
	}
    return g;
    }

    void apply(icontext_type& context, vertex_type& vertex, const BetweenessGather& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
                iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(total.edge_count.find(key)->second==0){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().djikstra_pieces[key].cost = 0.0;
            }
            if((vertex.data().djikstra_pieces[key].launched == true)&&
                    (vertex.data().djikstra_pieces[key].done == false)&&
                    (((long)vertex.data().djikstra_pieces[key].cost)==total.edge_count.find(key)->second)){
                vertex.data().djikstra_pieces[key].done = true;
                vertex.data().djikstra_pieces[key].cost = fmax(1.0,(double)total.edge_count.find(key)->second);
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
     	bool done = true;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if(vertex.data().djikstra_pieces.find(key)->second.launched && !vertex.data().djikstra_pieces.find(key)->second.done){
        		done = false;
      		}
        }
        if(!done){
                return graphlab::OUT_EDGES;
        }else{
                return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
                long key = iter->first;
                if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                        (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                        context.signal(edge.target());
                }
        }
    }
};

int main (int argc, char** argv){
    // Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
    global_logger().set_log_level(LOG_INFO);

    // Parse command line options -----------------------------------------------
    graphlab::command_line_options clopts("Betweeness Algorithm.");
    std::string graph_dir;
    clopts.attach_option("graph", graph_dir, "The graph file. Required ");
    clopts.add_positional("graph");
    clopts.attach_option("samplesize", desired_vertices_count, "(Sample Size) Number of spanning trees to use");

    std::string saveprefix;
    clopts.attach_option("saveprefix", saveprefix,
                         "If set, will save the resultant betweness score to a "
                         "sequence of files with prefix saveprefix");

    if(!clopts.parse(argc, argv)) {
      dc.cout() << "Error in parsing command line arguments." << std::endl;
      return EXIT_FAILURE;
    }
    if (graph_dir == "") {
      dc.cout() << "Graph not specified. Cannot continue";
      return EXIT_FAILURE;
    }

    // Build the graph ----------------------------------------------------------
    graph_type graph(dc);
    dc.cout() << "Loading graph using line parser" << std::endl;
    graph.load(graph_dir, line_parser);

    dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

    graphlab::omni_engine<DjikstraAlgorithm> engine(dc, graph, "asynchronous", clopts);

    num_vertices = graph.num_vertices();
    graphlab::vertex_set start_set = graph.select(selectVertices);
    engine.signal_vset(start_set);
    engine.start();

    const float runtime = engine.elapsed_seconds();
    dc.cout() << "Finished Djikstra engine in " << runtime << " seconds." << std::endl;

    graphlab::omni_engine<ClearBooleans> engine2(dc,graph,"asynchronous",clopts);
    engine2.signal_all();
    engine2.start();
    const float runtime2 = engine.elapsed_seconds();
    dc.cout() << "Finished resetting graph engine in " << runtime2 << " seconds." << std::endl;

    graphlab::omni_engine<BetweenessAlgorithm> engine3(dc,graph,"asynchronous",clopts);
    engine3.signal_all();
    engine3.start();
    const float runtime3 = engine.elapsed_seconds();
    dc.cout() << "Finished Betweeness engine in " << runtime3 << " seconds." << std::endl;

    if (saveprefix != "") {
      graph.save(saveprefix, betweeness_writer(),
         false,  // do not gzip
         true,   //save vertices
         false); // do not save edges
    }


    graphlab::mpi_tools::finalize();
    return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_algorithms/closeness.cpp
================================================
/*
 * Copyright (c) 2014 Daniel McEnnis.
 * portions of main Copyright (c) 2009 Carnegie Mellon
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <stdlib.h>
#include <graphlab.hpp>

/*
 * Djikstra Graph Node Class
 *
 * This class contains the information about a single graphlab node.
 * id - current best path's previous node id - next node on path to root
 * cost - current cost of the path to route by the current route: Note - this
 *       can become inaccurate in the course of calculations and must be recalculated
 *       by traversing the shortest path tree to get an accurate result.
 * launched - has execution of this node been sheduled
 * done - has execution of this node been completed
 */
class DjikstraNode {
public:
    unsigned long id;
    double cost;
    bool launched;
    bool done;

    DjikstraNode(){
        id = 0;
        cost = 1e100;
        launched = false;
        done=false;
    }

  void save(graphlab::oarchive& oarc) const {
    oarc << id << cost << launched << done;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> id >> cost >> launched >> done;
  }
};


/*
 * PrestigeAnalysisNode
 * Graph Node class for running multiple djikstra tree algorithms simultaneously
 * Contains a map of node id's to DjikstraNode instances
 * bookkeeping components
 *
 */
class PrestigeAnalysisNode {
public:
    std::map<long,DjikstraNode> djikstra_pieces;
    double local_value;
    double total;
    long count;
    int edge_count;

    PrestigeAnalysisNode(){
        local_value=0.0;
        total=0.0;
        count=0;
        edge_count=-1;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << djikstra_pieces << local_value << total << count << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> djikstra_pieces >> local_value >> total >> count >> edge_count;
    }
};


/*
 * Gather class for the Djikstra algorithm.
 * id: node id of the incoming edge's other end
 * cost: shortest path cost at the time this node gathers its edges
 * edge_count: a count of gathered edges
 *
 */
class Gather {
public:
    long id;
    double cost;
    int edge_count;

    Gather(){
        id=0;
        cost=0.0;
        edge_count=1;
    }

    Gather& operator+=(const Gather& other){
        if(other.id < 0){
            return *this;
        }
        if(this->id < 0){
            return *this;
        }
        if (cost <= other.cost){
            this->edge_count++;
            return *this;
        }
        this->edge_count += other.edge_count;
        return *this;
    }


    void save(graphlab::oarchive& oarc) const {
       oarc << id << cost << edge_count;
     }

     void load(graphlab::iarchive& iarc) {
       iarc >> id >> cost >> edge_count;
     }

};


/*
 * GatherMultiTree
 * map of djisktra root id's to their asociated content for that tree
 *
 */
class GatherMultiTree {
public:
    std::map<long,Gather> content;
    int edge_count;

    GatherMultiTree(){
        edge_count=0;
    }

    GatherMultiTree& operator+=(const GatherMultiTree& other){
        return *this;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << content << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> content >> edge_count;
    }
};

typedef PrestigeAnalysisNode vertex_data_type;

typedef GatherMultiTree gather_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, double> graph_type;


/*
 * Loads graphs in the form 'id (id edge_strength)*'
 *
 */
bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;
  // first entry in the line is a vertex ID
  strm >> vid;
  PrestigeAnalysisNode node;
  // insert this vertex with its label
  graph.add_vertex(vid, node);
  // while there are elements in the line, continue to read until we fail
  double edge_val=1.0;
  while(1){
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    strm >> edge_val;
    if (strm.fail())
      break;
    graph.add_edge(vid, other_vid,edge_val);
  }

  return true;
}


/*
 * Algorithm class whose sole purpose is to reset launched and done booleans
 * for all id's in a PrestigeAnalysisNode
 */
class ClearBooleans :
        public graphlab::ivertex_program<graph_type, gather_type>,
        public graphlab::IS_POD_TYPE {
public:
  edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        GatherMultiTree g;
        return g;
  }

  void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
      for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
          iter != vertex.data().djikstra_pieces.end(); ++iter){
          long key = iter->first;
          vertex.data().djikstra_pieces[key].launched = false;
          vertex.data().djikstra_pieces[key].done = false;
          vertex.data().djikstra_pieces[key].cost = 0.0;
      }
  }

  edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
          return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
  }
};


/*
 * Djikstra Algorithm Class
 *
 * Starting from the starting nodes, create an id for this root and signal
 * all neighbors to start the calculations. Set launched when started, done
 * when all edges have been signaled.
 *
 * As a signal is receieved collect edges to determine if the best path has
 * changed.  If it has, update. If the first signal is receieved, marked
 * the node as launched and then mark the node done after signaling neighbors.
 *
 * The process terminates when all nodes active have no neighbors that are not done.
 *
 * This differs from djikstra in that the edge directions are reversed.
 */
class DjikstraAlgorithm :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::OUT_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    Gather g;
    GatherMultiTree tree;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key=iter->first;
            if((edge.source().data().djikstra_pieces[key].launched == true)&&
                    (edge.source().data().djikstra_pieces[key].done == false)){
                double c = edge.data() + edge.source().data().djikstra_pieces[key].cost;
                g.cost = c;
                g.id = edge.source().data().djikstra_pieces[key].id;
                g.edge_count = 1;
                tree.content[key] = g;
            }else{
                g.id=0;
            }
        }
    	return tree;
	}

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces[key].launched == false){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                    if(vertex.data().djikstra_pieces[key].cost > total.content.find(key)->second.cost){
                    vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                    vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
                    }else{
                    vertex.data().djikstra_pieces[key].done = true;
                    }
            }else{
                vertex.data().djikstra_pieces[key].done = true;
            }
        }
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)==vertex.data().djikstra_pieces.end()){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
        bool done = true;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)->second.launched &&
                    !vertex.data().djikstra_pieces.find(key)->second.done){
                done = false;
            }
        }
        if(!done){
            return graphlab::IN_EDGES;
        }else{
            return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                context.signal(edge.target());
            }
    }
  }
};

size_t num_vertices = 3000;
size_t desired_sample_size = 3000;
size_t selected_sample_size = 0;

/*
 * For every node, print the previous node in its spanning tree for all spanning trees this node is in.
 *
 */
struct closeness_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t";
    double value = 0.0;
    for(std::map<long,DjikstraNode>::const_iterator iter = v.data().djikstra_pieces.begin();
        iter != v.data().djikstra_pieces.end();++iter){
        value += iter->second.cost;
    }
    value /= selected_sample_size;
    strm << value << std::endl;
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};

/*
 * Select ~3000 root nodes or an exact count which gives up around +/-3% accuracy
 * in prestige measures. It is a constant memory random selector.
 */
bool selectVertices(const graph_type::vertex_type& vertex){
    unsigned int r = random();
    std::cout << "Random seed is " << r << std::endl;
    if(r < (desired_sample_size * RAND_MAX / num_vertices)){
          selected_sample_size++;
          return true;
    }
    return false;
}


/*
 * Collects the current shortest path cost for each spanning tree
 * in its map of spanning trees.  The algorithm is responsible for
 * filtering out trees where the collecting node is not a node a step
 * further in the spanning tree.
 */
class ClosenessGather{
public:
    std::map<long,double> counts;
    std::map<long,long> edge_count;
	
 void save(graphlab::oarchive& oarc) const {
    oarc << counts << edge_count;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> counts >> edge_count;
  }

  ClosenessGather& operator+=(const ClosenessGather& gather){
    for(std::map<long, double>::const_iterator iter = this->counts.begin();
        iter != this->counts.end(); ++iter ){
                long key = iter->first;
                this->counts[key] += gather.counts.find(key)->second;
                this->edge_count[key] += gather.edge_count.find(key)->second;
	}
    for(std::map<long, double>::const_iterator iter = gather.counts.begin();
            iter != gather.counts.end(); ++iter){
                long key = iter->first;
                if(this->counts.find(key) != this->counts.end()){
                        this->counts[key] = gather.counts.find(key)->second;
                        this->edge_count[key] = gather.edge_count.find(key)->second;
                }
    }
    return *this;
  }


};

/*
 * For every spanning tree root node in the graph node map, start from the root
 * and walk back towards the leafs.  Record the shortest path costs for every node
 * along the way, halting when no signaled node has a neighbor that is not done.
 *
 * This generates a random sample of the set of all spanning trees for the graph
 * of a about 3000 trees (or exact value if fewer than 3000 nodes) that gives the
 * closeness metric of each node to +/-3%. It is needed to sum the closeness for each
 * djikstra id to get the value without using the output function.
 *
 */
class ClosenessAlgorithm :
  public graphlab::ivertex_program<graph_type, ClosenessGather>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::OUT_EDGES;
    }

    ClosenessGather gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    ClosenessGather g;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key= iter->first;
        if(edge.target().data().djikstra_pieces[key].id == vertex.id()){
            if(edge.source().data().djikstra_pieces[key].launched == true){
                g.counts[key] = edge.source().data().djikstra_pieces[key].cost + edge.data();
                g.edge_count[key] = 1;
            }
        }
	}
    return g;
    }

    void apply(icontext_type& context, vertex_type& vertex, const ClosenessGather& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
                iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(total.edge_count.find(key)->second==0){
                vertex.data().djikstra_pieces[key].launched = true;
            }
            if((vertex.data().djikstra_pieces[key].launched == true)&&
                    (vertex.data().djikstra_pieces[key].done == false)&&
                    (((long)vertex.data().djikstra_pieces[key].cost)==total.edge_count.find(key)->second)){
                vertex.data().djikstra_pieces[key].done = true;
                vertex.data().djikstra_pieces[key].cost = (double)total.edge_count.find(key)->second;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
     	bool done = true;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if(vertex.data().djikstra_pieces.find(key)->second.launched && !vertex.data().djikstra_pieces.find(key)->second.done){
        		done = false;
      		}
        }
        if(!done){
                return graphlab::IN_EDGES;
        }else{
                return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
                long key = iter->first;
                if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                        (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                        context.signal(edge.target());
                }
        }
    }
};

int main (int argc, char** argv){
    // Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
    global_logger().set_log_level(LOG_INFO);

    // Parse command line options -----------------------------------------------
    graphlab::command_line_options clopts("Closeness Algorithm");
    std::string graph_dir;
    clopts.attach_option("graph", graph_dir, "The graph file. Required ");
    clopts.add_positional("graph");
    clopts.attach_option("samplesize", desired_sample_size , "(Sample size) the number of spanning trees to calculate");

    std::string saveprefix;
    clopts.attach_option("saveprefix", saveprefix,
                         "If set, will save the resultant closeness score to a "
                         "sequence of files with prefix saveprefix");

    if(!clopts.parse(argc, argv)) {
      dc.cout() << "Error in parsing command line arguments." << std::endl;
      return EXIT_FAILURE;
    }
    if (graph_dir == "") {
      dc.cout() << "Graph not specified. Cannot continue";
      return EXIT_FAILURE;
    }

    // Build the graph ----------------------------------------------------------
    graph_type graph(dc);
    dc.cout() << "Loading graph using line parser" << std::endl;
    graph.load(graph_dir, line_parser);

    dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

    graphlab::omni_engine<DjikstraAlgorithm> engine(dc, graph, "asynchronous", clopts);

    num_vertices = graph.num_vertices();
    graphlab::vertex_set start_set = graph.select(selectVertices);
    engine.signal_vset(start_set);
    engine.start();

    const float runtime = engine.elapsed_seconds();
    dc.cout() << "Finished Djikstra engine in " << runtime << " seconds." << std::endl;

    graphlab::omni_engine<ClearBooleans> engine2(dc,graph,"asynchronous",clopts);
    engine2.signal_all();
    engine2.start();

    const float runtime2 = engine.elapsed_seconds();
    dc.cout() << "Finished resetting the graph in " << runtime2 << " seconds." << std::endl;

    graphlab::omni_engine<ClosenessAlgorithm> engine3(dc,graph,"asynchronous",clopts);
    engine3.signal_vset(start_set);
    engine3.start();

    const float runtime3 = engine.elapsed_seconds();
    dc.cout() << "Finished the closeness engine in " << runtime3 << " seconds." << std::endl;

    if (saveprefix != "") {
      graph.save(saveprefix, closeness_writer(),
         false,  // do not gzip
         true,   //save vertices
         false); // do not save edges
    }


    graphlab::mpi_tools::finalize();
    return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_algorithms/djikstra.cpp
================================================
/*  
 * Copyright (c) 2014 Daniel McEnnis.
 * portions of main Copyright (c) 2009 Carnegie Mellon
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <stdlib.h>
#include <graphlab.hpp>

/*
 * Djikstra Graph Node Class
 *
 * This class contains the information about a single graphlab node.
 * id - current best path's previous node id - next node on path to root
 * cost - current cost of the path to route by the current route: Note - this
 *       can become inaccurate in the course of calculations and must be recalculated
 *       by traversing the shortest path tree to get an accurate result.
 * launched - has execution of this node been sheduled
 * done - has execution of this node been completed
 */
class DjikstraNode {
public:
	long id;
	double cost;
	bool launched;
	bool done;

	DjikstraNode(){
		id = 0;
		cost = 1e100;
		launched = false;
		done=false;
	}

  void save(graphlab::oarchive& oarc) const {
    oarc << id << cost << launched << done;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> id >> cost >> launched >> done;
  }
};

/*
 * PrestigeAnalysisNode
 * Graph Node class for running multiple djikstra tree algorithms simultaneously
 * Contains a map of node id's to DjikstraNode instances
 * bookkeeping components
 *
 */
class PrestigeAnalysisNode {
public:
    std::map<long,DjikstraNode> djikstra_pieces;
	double local_value;
	double total;
	long count;
	int edge_count;

	PrestigeAnalysisNode(){
		local_value=0.0;
		total=0.0;
		count=0;
		edge_count=-1;
	}

    void save(graphlab::oarchive& oarc) const {
        oarc << djikstra_pieces << local_value << total << count << edge_count;
	}

	void load(graphlab::iarchive& iarc) {
        iarc >> djikstra_pieces >> local_value >> total >> count >> edge_count;
	}
};

/*
 * Gather class for the Djikstra algorithm.
 * id: node id of the incoming edge's other end
 * cost: shortest path cost at the time this node gathers its edges
 * edge_count: a count of gathered edges
 *
 */
class Gather {
public:
	unsigned long id;
	double cost;
	int edge_count;
	
	Gather(){
		id=0;
		cost=0.0;
		edge_count=1;
	}

	Gather& operator+=(const Gather& other){
		if(other.id < 0){
			return *this;
		}
        if(this->id < 0){
			return *this;
		}		
		if (cost <= other.cost){
            this->edge_count++;
			return *this;
		}
        this->edge_count += other.edge_count;
        return *this;
	}


    void save(graphlab::oarchive& oarc) const {
       oarc << id << cost << edge_count;
     }

     void load(graphlab::iarchive& iarc) {
       iarc >> id >> cost >> edge_count;
     }

};

/*
 * GatherMultiTree
 * map of djisktra root id's to their asociated content for that tree
 *
 */
class GatherMultiTree {
public:
    std::map<long,Gather> content;
	int edge_count;

	GatherMultiTree(){
		edge_count=0;
	}

	GatherMultiTree& operator+=(const GatherMultiTree& other){
		return *this;	
	}

    void save(graphlab::oarchive& oarc) const {
        oarc << content << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> content >> edge_count;
    }
};

// The vertex data is its label 
typedef PrestigeAnalysisNode vertex_data_type;

typedef GatherMultiTree gather_type;
 
// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, double> graph_type;

/*
 * Loads graphs in the form 'id (id edge_strength)*'
 *
 */
bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;
  // first entry in the line is a vertex ID
  strm >> vid;
  PrestigeAnalysisNode node;
  // insert this vertex with its label
  graph.add_vertex(vid, node);
  // while there are elements in the line, continue to read until we fail
  double edge_val=1.0;
  while(1){
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    strm >> edge_val;
    if (strm.fail())
      break;
    graph.add_edge(vid, other_vid,edge_val);
  }

  return true;
}

/*
 * Algorithm class whose sole purpose is to reset launched and done booleans
 * for all id's in a PrestigeAnalysisNode
 */
class ClearBooleans :
        public graphlab::ivertex_program<graph_type, gather_type>,
        public graphlab::IS_POD_TYPE {
public:
  edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        GatherMultiTree g;
        return g;
  }

  void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
      for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
          iter != vertex.data().djikstra_pieces.end(); ++iter){
          long key = iter->first;
          vertex.data().djikstra_pieces[key].launched = false;
          vertex.data().djikstra_pieces[key].done = false;
          vertex.data().djikstra_pieces[key].cost = 0.0;
      }
  }

  edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
          return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
  }
};

/*
 * Djikstra Algorithm Class
 *
 * Starting from the starting nodes, create an id for this root and signal
 * all neighbors to start the calculations. Set launched when started, done
 * when all edges have been signaled.
 *
 * As a signal is receieved collect edges to determine if the best path has
 * changed.  If it has, update. If the first signal is receieved, marked
 * the node as launched and then mark the node done after signaling neighbors.
 *
 * The process terminates when all nodes active have no neighbors that are not done.
 */
class DjikstraAlgorithm :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::IN_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    Gather g;
    GatherMultiTree tree;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key=iter->first;
            if((edge.source().data().djikstra_pieces[key].launched == true)&&
                    (edge.source().data().djikstra_pieces[key].done == false)){
                double c = edge.data() + edge.source().data().djikstra_pieces[key].cost;
                g.cost = c;
                g.id = edge.source().data().djikstra_pieces[key].id;
                g.edge_count = 1;
                tree.content[key] = g;
            }else{
                g.id=0;
            }
        }
	return tree;
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces[key].launched == false){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                    if(vertex.data().djikstra_pieces[key].cost > total.content.find(key)->second.cost){
                    vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                    vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
                    }else{
                    vertex.data().djikstra_pieces[key].done = true;
                    }
      		}else{
                vertex.data().djikstra_pieces[key].done = true;
            }
        }
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)==vertex.data().djikstra_pieces.end()){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
     	bool done = true;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)->second.launched &&
                    !vertex.data().djikstra_pieces.find(key)->second.done){
        		done = false;
      		}
        }
        if(!done){
            return graphlab::OUT_EDGES;
        }else{
            return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
      			context.signal(edge.target()); 
    		}
	}	
  }
};

/*
 * For every node, print the previous node in its spanning tree for all spanning trees this node is in.
 *
 */
struct djikstra_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id();
    for(std::map<long, DjikstraNode>::const_iterator iter = v.data().djikstra_pieces.begin();
        iter != v.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        strm << "\t" << key << "\t" << iter->second.id << std::endl;
    }
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};

size_t num_vertices = 3000;
size_t desired_vertices_count = 3000;
size_t selected_vertices_count = 0;

/*
 * Select ~3000 root nodes or an exact count which gives up around +/-3% accuracy
 * in prestige measures. It is a constant memory random selector.
 */
bool selectVertices(const graph_type::vertex_type& vertex){
    unsigned int r = random();
    std::cout << "Random seed is " << r << std::endl;
    if(r < ((desired_vertices_count * RAND_MAX) / num_vertices)){
        selected_vertices_count++;
          return true;
    }
    return false;
}


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);
  
  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("Djikstra Algorithm.");
  std::string graph_dir;
  clopts.attach_option("graph", graph_dir, "The graph file. Required ");
  clopts.add_positional("graph");
  clopts.attach_option("samplesize", desired_vertices_count, "Target number of simultaneous spanning trees");

  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the spanning trees to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    dc.cout() << "Graph not specified. Cannot continue";
    return EXIT_FAILURE;
  }
 
  // Build the graph ----------------------------------------------------------
  graph_type graph(dc);
  dc.cout() << "Loading graph using line parser" << std::endl;
  graph.load(graph_dir, line_parser);

  dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

  // Algorithm for creating the spanning trees
  graphlab::omni_engine<DjikstraAlgorithm> engine(dc, graph, "asynchronous", clopts);

  num_vertices = graph.num_vertices();
  // create the total number of djisktra spanning trees to create at once.
  graphlab::vertex_set start_set = graph.select(selectVertices);
  engine.signal_vset(start_set);
  engine.start();

  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime << " seconds." << std::endl;

  if (saveprefix != "") {
    graph.save(saveprefix, djikstra_writer(),
       false,  // do not gzip
       true,   //save vertices
       false); // do not save edges
  }
  

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_algorithms/graph_analytics.dox
================================================
/**

\page graph_analytics Graph Analytics 

\brief The graph analytics toolkit contains applications for performing graph 
analytics and extracting patterns from the graph structure mostly from the Social Network Analysis Toolkit

The toolkit current contains:
 - \ref djikstra "Djisktra Algorithm Base"
 - \ref betweeness "Betweeness Algorithm"
 - \ref closeness "Closeness Algorithm"
 - \ref prestige "Prestge Algoritm"

All toolkits take any of the graph formats described in \ref graph_formats . 


\section djikstra "Djikstra Algorithm Base"

The input format for the djikstra algorithm is

\verbatim
<long node_id> [<long node_id> <float edge_value>]*
\endverbatim

The output format of the djikstra algorithm is

\verbatim
<long node_id> [<long root_of_spanning_tree_id> <long next_node_higher_in_spanning_tree>]*
\endverbatim

roots do not list themselves

Run this command with:

\verbatim
mpiexec -n <N machines> --hostfile <hostfile> ./djikstra --graph <graph location> [--saveprefix <prefix to attach to output>] 
\endverbatim

The output describes the spanning trees constructed starting at up to 3000 nodes

\subsection djikstra_imp "Djikstra Algorithm Details"

Djikstra starts with a randomly selected (~3000) set of nodes using a constant time and space random process.  Each gather collects the earlier spanning tree members.  The apply step calculates the best path, storing it. The scatter step notifies all nodes other than nodes that have notifid it to execute now. This process runs concurrently for each starting node.  

Note: this algorithm does not preserve correctness of the total cost at each node, only relative stregth between node choices. Re-walk the spanning tree to calculate this (see below for examples).

\section betweeness "Betweeness Algorithms"

The input format for the betweeness algorithm is:

\verbatim
<long node_id> [<long node_id> <float edge_value>]*
\endverbatim

The output format of the betweeness algorithm is

\verbatim
<long node_id> <float betweeness score>
\endverbatim

Run this command with:

\verbatim
mpiexec -n <N machines> --hostfile <hostfile> ./betweeness --graph <graph location> [--saveprefix <prefix to attach to output>] 
\endverbatim

The output estimates betweeness using ~3000 randomly selected spanning trees (typically +/-3% accuracy in the measure for each node.)

\subsection betweeness_imp "Betweeness Algorithm Details"

Djikstra sanning trees are calculated first, then the datsa structure is reset, then betweeness scores are calculated by walking the spanning trees from leaves to roots. Finally, the betweeness scores are collated from the various samples of spanning trees.

See \ref djiksra_imp for details on how spanning trees are calculated.

The next step resets all the bookkeeping on the spanning trees and sets costs to zero.

Finally, the betweeness is calculated.  All nodes are started at first, but only nodes without another spanning tree node pointing to it have a non-null execution.  

The gather step checks if all nodes in the spanning tree pointing to it have been calculated yet, silently skipping if this is not true.  Otherwise, the betweeness scores are collected.

The apply step sums the betweeness score for this node and spanning tree.

The scatter step signals the next higher node in this spanning tree that a new betweeness score is ready.

When the graph is saved, it outputs the sum of all betweeness scores across all calculated spanning trees and estimates the expected final betweeness score.

\section closeness "Closeness Algorithm"

The input format for the closeness algorithm is:

\verbatim
<long node_id> [<long node_id> <float edge_value>]*
\endverbatim

The output format of the betweeness algorithm is

\verbatim
<long node_id> <float closeness score>
\endverbatim

Run this command with:

\verbatim
mpiexec -n <N machines> --hostfile <hostfile> ./closeness --graph <graph location> [--saveprefix <prefix to attach to output>] 
\endverbatim

The output estimates closeness using ~3000 randomly selected spanning trees (typically +/-3% accuracy in the measure for each node.)

\subsection closeness_imp "Closeness Algorithm Details"

Djikstra spanning trees are calculated first, then the datsa structure is reset, then closeness scores are calculated by walking the spanning trees from leaves to roots. Finally, the closeness scores are collated from the various samples of spanning trees.

See \ref djiksra_imp for details on how spanning trees are calculated, except the link direction is reversed.

The next step resets all the bookkeeping on the spanning trees and sets costs to zero.

Finally, the closeness is calculated.  The starting node set is reused, and the spanning trees are all walked simultaneously from root to leaves.

The gather step collects the parent closeness score.

The apply step combines the parent's closeness score with the edge value and stores it.

The scatter step signals al child nodes.

When the graph is saved, it outputs the sum of all closeness scores across all calculated spanning trees and estimates the expected final closeness score.


\section prestige "Prestige Algorithm" 

The input format for the prestige algorithm is:

\verbatim
<long node_id> [<long node_id> <float edge_value>]*
\endverbatim

The output format of the betweeness algorithm is

\verbatim
<long node_id> <float prestige score>
\endverbatim

Run this command with:

\verbatim
mpiexec -n <N machines> --hostfile <hostfile> ./prestige --graph <graph location> [--saveprefix <prefix to attach to output>] 
\endverbatim

The output estimates prestige using ~3000 randomly selected spanning trees (typically +/-3% accuracy in the measure for each node.)


\subsection prestige_imp "Prestige Algorithm Details"

Djikstra spanning trees are calculated first, then the datsa structure is reset, then prestige scores are calculated by walking the spanning trees from leaves to roots. Finally, the prestige scores are collated from the various samples of spanning trees.

See \ref djiksra_imp for details on how spanning trees are calculated.

The next step resets all the bookkeeping on the spanning trees and sets costs to zero.

Finally, the prestige is calculated.  The starting node set is reused, and the spanning trees are all walked simultaneously from root to leaves.

The gather step collects the parent prestige score.

The apply step combines the parent's prestige score with the edge value and stores it.

The scatter step signals al child nodes.

When the graph is saved, it outputs the sum of all prestige scores across all calculated spanning trees and estimates the expected final prestige score.


\section graph_analytics_pagerank PageRank 

The PageRank program computes the pagerank of each vertex. 
See the <a href="http://en.wikipedia.org/wiki/PageRank">Wikipedia article</a>
for details of the algorithm.

\subsection Input Graph
The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. 

\verbatim
> ./pagerank --graph=[graph prefix] --format=[format] 
\endverbatim

Alternatively, a synthetic power law graph of an arbitrary number of vertices
can be generated using:
\verbatim
> ./pagerank --powerlaw=[nvertices]
\endverbatim
The resultant graph will have powerlaw out-degree, and nearly constant in-degree. 
The actual generation process draws vertex degree from a truncated power-law
distribution with alpha=2.1. The distribution is truncated at maximum out-degree
100M to avoid allocating massive amounts of memory for creating the sampling distribution.

\subsection Computation Type 
There are several modes of computation that are supported. All will eventually
obtain the same solutions. 

### Classical
To get classical PageRank iterations, adding the option
\verbatim
> --iterations=[N Iterations]
\endverbatim

### Dynamic Synchronous (default) 
The dynamic synchronous computation only performs computation on vertices
that have not yet converged to the desired tolerance. The default tolerance
is 0.001. This can be modified by adding the option
\verbatim
>  --tol=[tolerance]
\endverbatim

### Dynamic Asynchronous
The dynamic asynchronous computation only performs computation on vertices
that have not yet converged to the desired tolerance. This uses the 
asynchronous engine. The default tolerance is 0.001. 
This can be modified by adding the option
\verbatim
>  --tol=[tolerance]
\endverbatim

\note This is known to be slow! PageRank does not benefit from the consistency
guaranteed by the asynchronous engine. A new engine is in development with 
weaker consistency semantics, but sufficient for pagerank. 


\subsection Output
To save the resultant pagerank of each vertex, include the option
\verbatim
> --saveprefix=[output prefix]
\endverbatim

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file contains two numbers: a Vertex ID, and the 
computed PageRank. Note that the output vector is NOT normalized, namely 
computed entries do not sum into one. 

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./pagerank ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Optional). The prefix from which to load the graph data
\li \b --format (Optional). The format of the input graph 
\li \b --powerlaw (Optional. Default 0). If set, generates synthetic powerlaw graph with
                        the specified number of vertices.
\li \b --saveprefix (Optional. Default ""). If set, will write the output counts.
\li \b --tol (Optional. Default=1E-3). Changes the convergence tolerance for the Dynamic
                          computation modes.
\li \b --iterations (Optional. Default 0). If set, runs classical PageRank iterations
                      for the specified number of iterations.
\li \b -–graph_opts (Optional, Default empty) Any additional graph options. See
  graphlab::distributed_graph a list of options.
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b -–engine (Optional, Default "synchronous") Sets the engine type. Must be
                  either "synchronous" or "asynchronous"
\li \b -–engine (Optional, Default "synchronous") Sets the engine options. Available
                  options depend on the engine type. See
                  graphlab::async_consistent_engine and
                  graphlab::synchronous_engine for details.


\section graph_analytics_kcore KCore Decomposition 
This program iteratively finds the KCore of the network.

\subsection Input Graph
The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. 

\verbatim
> ./kcore --graph=[graph prefix] --format=[format] 
\endverbatim
Output may look like:
\verbatim
K=0:  #V = 875713   #E = 4322051
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 0
K=1:  #V = 875713   #E = 4322051
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 153407
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=2:  #V = 711870   #E = 4160100
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 108715
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=3:  #V = 581712   #E = 3915291
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 69907
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=4:  #V = 492655   #E = 3668104
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 52123
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=5:  #V = 424155   #E = 3416251
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 41269
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=6:  #V = 367361   #E = 3158776
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 33444
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=7:  #V = 319194   #E = 2902138
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 29201
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=8:  #V = 274457   #E = 2629033
......
\endverbatim

To just get the informative lines:
\verbatim
> ./kcore --graph=[graph prefix] --format=[format] > k_out.txt
  ...
> cat k_out.txt
Computes a k-core decomposition of a graph.

Number of vertices: 875713
Number of edges:    4322051
K=0:  #V = 875713   #E = 4322051
K=1:  #V = 875713   #E = 4322051
K=2:  #V = 711870   #E = 4160100
K=3:  #V = 581712   #E = 3915291
K=4:  #V = 492655   #E = 3668104
K=5:  #V = 424155   #E = 3416251
K=6:  #V = 367361   #E = 3158776
K=7:  #V = 319194   #E = 2902138
K=8:  #V = 274457   #E = 2629033
K=9:  #V = 231775   #E = 2335154
K=10:  #V = 193406   #E = 2040738
K=11:  #V = 159020   #E = 1753273
K=12:  #V = 131362   #E = 1500517
K=13:  #V = 106572   #E = 1256952
K=14:  #V = 86302   #E = 1047053
K=15:  #V = 68409   #E = 849471
K=16:  #V = 53459   #E = 676076
K=17:  #V = 40488   #E = 519077
...
\endverbatim


The program can also save a copy of the graph at each stage by adding an
option.
\verbatim
> --savecores=[prefix]
\endverbatim

The resultant graphs will be saved with prefixes [prefix].K
For instance if prefix is <tt>out</tt>, 
The 0-Core graph may be saved in 
\verbatim
out.0.1_of_4
out.0.2_of_4
out.0.3_of_4
out.0.4_of_4
\endverbatim

The 5-Core graph will be saved in 
\verbatim
out.5.1_of_4
out.5.2_of_4
out.5.3_of_4
out.5.4_of_4
\endverbatim

and so on. 

The range of k-Core graphs to compute can be controlled by the <tt>kmin</tt>
and the <tt>kmax</tt> option described below.

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./kcore....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b --savecores (Optional. Default ""). The target prefix to save 
the resultant K-core graphs.
\li \b --kmin (Optional. Default 0). Only output result for the K-core graph starting
                        at K=kmin
\li \b --kmax (Optional. Default Inf). Only output result for the K-core graph 
                        up to K=kmax


\section graph_analytics_triangle_coloring Graph Coloring 

The graph coloring program implements a really simple graph coloring 
procedure: each vertex reads the colors of its neighbors and takes on 
the smallest possible color which does not conflict with its neighbors.

The procedure necessarily uses the asynchronous engine (it will never
converge with the synchronous engine).

The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. It is important that the input be "cleaned"
and that reverse edges are removed: i.e. if edge 1-->5 exists, edge 5-->1 should
not exist. (The program will run without these edge removed. But numbers
may be erroneous).

To color a graph, the minimal set of options required are:
\verbatim
> ./simple_coloring --graph=[graph prefix] --format=[format] --output=[output prefix]
\endverbatim
Output looks like:
\verbatim
Number of vertices: 875713
Number of edges:    5105039
Coloring...
Completed Tasks: 875713
Issued Tasks: 875713
Blocked Issues: 0
------------------
Joined Tasks: 0
Colored in 42.3684 seconds
Metrics server stopping.
\endverbatim

Observe that the number of Completed Tasks is identical to the number of vertices.
This is a result of the consistency model which ensures that the entire
vertex update is peformed "atomically".

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file contains two numbers: a Vertex ID, and the number
color of the vertex.

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./simple_coloring ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b –-graph_opts (Optional, Default empty) Any additional graph options. See
  --graph_help a list of options.
\li \b –-engine_opts (Optional, Default empty) Any additional engine options. See
  --engine_help a list of options.

A particularly relevant option is 
\verbatim
--engine_opts="factorized=true"
\endverbatim

This uses a weaker consistency setting which only guarantees that individual
"gather/apply/scatter" operations are atomic, but does not guarantee atomicity
of the entire update. As a result, this may require more updates to complete,
but could in practice run significantly faster.


\section graph_analytics_connected_component Connected Component

The connected component program can find all connected components in a 
graph, and can also count the number of vertices (size) of each connected 
component.

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.

To find connected components in a graph, the minimal set of options
required are:
\verbatim
> ./connected_component --graph=[graph prefix] --format=[format]
\endverbatim

Here is a toy example, graph with 6 nodes and 5 edges:
\verbatim
# example graph
# vertices: 6 edges: 5
1 2
2 3
4 5
4 6
5 6
\endverbatim

Assume file name is toy_graph, the command used for running connected compnents is
\verbatim
> ./connected_component --graph=toy_graph --format=tsv --saveprefix=out
\endverbatim


When you set <tt>--saveprefix=output_prefix</tt>, the pairs of a Vertex ID and a 
Component ID will be written to a sequence of files with prefix 
<tt>output_prefix</tt>. This may be located on HDFS. For instance, if the 
<tt>output_prefix</tt> is <tt>"v_out"</tt>, the output files will be written to:

\verbatim
out_1_of_4
out_2_of_4
out_3_of_4
out_4_of_4
\endverbatim

Let's examine the output. The first column is the node id, while the second column is it's assigned component number
(which is also the lowest node id in this component). In our case:
\verbatim
1,1
2,1
3,1
4,4
5,4
6,4
\endverbatim

There are two components. The first compoent is 1,2,3 and the second component is 4,5,6 

Note that this program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./connected_component ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.

\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --saveprefix (Optional). If set, pairs of a Vertex ID and a Component 
ID will be saved to a sequence of files with the given prefix.
\li \b --ncpus (Optional. Default 2). The number of processors that will be used
for computation.
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.
  
connected_components_stats is a helper utility, which computes histogram of component 
sizes. 

Using our toy example
\verbatim
> ./connected_component_stats --graph=out
Connected Component

INFO:     mpi_tools.hpp(init:63): MPI Support was not compiled.
INFO:     dc.cpp(init:573): Cluster of 1 instances created.
INFO:     distributed_graph.hpp(set_ingress_method:3200): Automatically determine ingress method: grid
Loading graph in format: adj
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_1_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_2_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_3_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_4_of_4
INFO:     distributed_ingress_base.hpp(finalize:185): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:519): Graph info:
	 nverts: 2
	 nedges: 0
	 nreplicas: 2
	 replication factor: 1
Complete Finalization in 0.001965
graph calculation time is 2.4e-05 sec
RESULT:
size	count
3	2
\endverbatim

As expected, there are two components of size 3.


\section graph_analytics_approximate_diameter Approximate Diameter

The approximate diameter program can estimate a diameter of a graph. 
The implemented algorithm is based on the work, 

U Kang, Charalampos Tsourakakis, Ana Paula Appel, Christos Faloutsos and Jure Leskovec, 
HADI: Fast Diameter Estimation and Mining in Massive Graphs with Hadoop (2008).

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.

To compute an approximate diameter of a graph, the minimal set of options
required are:
\verbatim
> ./approximate_diameter --graph=[graph prefix] --format=[format]
\endverbatim
Output looks like:
\verbatim
Approximate graph diameter
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
1-th hop: 12895307 vertex pairs are reached
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
2-th hop: 319726269 vertex pairs are reached
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
3-th hop: 319769151 vertex pairs are reached
converge
graph calculation time is 40 sec
approximate diameter is 2
\endverbatim

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./approximate_diameter ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.

\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --tol (Optional. Default=1E-4). Changes the convergence tolerance for 
the number of reached vertex pairs at each hop.
\li \b --use-sketch (Optional. Default=1). If true, will use Flajolet & Martin 
bitmask to approximately count numbers of reached vertex pairs, and will require a 
smaller memory. If false, will count exact numbers of reached vertex pairs. But 
this will need a huge memory and be slow.
\li \b --ncpus (Optional. Default 2). The number of processors that will be used
for computation.  
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.


\section graph_analytics_partitioning Graph Partitioning 

This program can partition a graph by using normalized cut.

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.
You can also give weights to edges with the <tt>weight</tt> format.
For instance in this <tt>weight</tt> format file, there are 5 edges:

\verbatim
1 2 4.0
2 3 1.0
3 4 5.0
4 5 2.0
5 3 3.0
\endverbatim

To partition a graph, the minimal set of options required are:

\verbatim
> ./partitioning --graph=[graph prefix] --format=[format]
\endverbatim

This program uses svd in Graphlab Collaborative Filtering Toolkit and 
kmeans in Graphlab Clustering Toolkit. The paths to the directories are 
specified by <tt>--svd-dir</tt> and <tt>--kmeans-dir</tt>, respectively.

The program will create some intermediate files. The final partitioning
result is written in files named <tt>[graph prefix].result</tt> with suffix,
for example <tt>[graph prefix].result_1_of_4</tt>. The partitioning result 
data consists of two columns: one for the ids and the other for the 
assigned partitions. For instance:

\verbatim
1 0
2 0
3 1
4 1
5 1
\endverbatim

<b>NOTE:</b> To run this program in a distributed setting, you must use the 
"mpi-args" option, not like other graphlab toolkits. 
The graph partitioning calls other graphlab programs.
When "--mpi-args" is set, these graphlab programs are called with "mpiexec" and the 
string written after the "mpi-args" option.
For example, if you set --mpi-args="-n 4 --hostfile host", the program calls the 
other graphlab programs with "mpiexec -n 4 --hostfile host".

\subsection Options
Relevant options are:
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph. If "weight" is 
set, the program will read the data file where each line holds [id1] [id2] 
[weight].
\li \b --partitions (Optional. Default 2). The number of partitions
\li \b --svd-dir (Optional. Default ../collaborative_filtering/).
Path to the directory where Graphlab svd is located
\li \b --kmeans-dir (Optional. Default ../clustering/). Path to the directory where 
Graphlab kmeans is located
\li \b --ncpus (Optional. Default 2). The number of processors that will be used 
for computation.
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.
\li \b --mpi-args (Optional, Default empty). If set, will execute mipexec with the given string.
  
  
\section graph_analytics_total_subgraph_centrality "Total Subgraph Centrality"
Total subgraph centrality was implemented by Jacob Kesinger, see additional
details in his <a href="http://jacobkesinger.tumblr.com/post/64338572799/total-subgraph-centrality">blog post</a>.
Total Subgraph Communicability is a new centrality measure due to
Benzi&Klymco [1].   For a directed graph with adjacenty matrix A, 

\verbatim
TSC_i = sum_j exp(A)_{ij} = (exp(A)*1)_i.
\endverbatim

This code calculates the TSC using an Arnoldi iteration on the Krylov
subspace {b, Ab,A*Ab, A*A*Ab, ...}  due to Saad[1], and using the new
warp engine from Graphlab 2.2 (without which this would have been, at
best, very challenging).

Small components of large graphs will have bogus answers due to
floating point issues.  To find the exact TSC for a particular node i,
run with "--column i" to find exp(A)*e_i; you will have to sum the
resulting output yourself, however.


SAMPLE INPUT:
\verbatim
0	1
1	2
1	3
2	4
3	4
1	0
2	1
3	1
4	2
4	3
\endverbatim

OUTPUT:
\verbatim
0 5.17784
1 10.3319
2 8.49789
3 8.49789
4 7.96807
\endverbatim

You can verify this in python as:
\verbatim
import scipy
import scipy.linalg
A = scipy.array([[0,1,0,0,0],[1,0,1,1,0],[0,1,0,0,1],[0,1,0,0,1],[0,0,1,1,0]])
scipy.linalg.expm2(A).sum(axis=1)
\endverbatim

[1]: Benzi, Michele, and Christine Klymko. Total Communicability as a Centrality Measure. ArXiv e-print, February 27, 2013. <a href="http://arxiv.org/abs/1302.6770">arxiv</a>

[2]: Saad, Yousef. “Analysis of Some Krylov Subspace Approximations to the Matrix Exponential Operator.” SIAM Journal on Numerical Analysis 29, no. 1 (1992): 209–228.
*/


================================================
FILE: toolkits/graph_algorithms/prestige.cpp
================================================
/*
 * Copyright (c) 2014 Daniel McEnnis.
 * portions of main Copyright (c) 2009 Carnegie Mellon
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <stdlib.h>
#include <graphlab.hpp>

/*
 * Djikstra Graph Node Class
 *
 * This class contains the information about a single graphlab node.
 * id - current best path's previous node id - next node on path to root
 * cost - current cost of the path to route by the current route: Note - this
 *       can become inaccurate in the course of calculations and must be recalculated
 *       by traversing the shortest path tree to get an accurate result.
 * launched - has execution of this node been sheduled
 * done - has execution of this node been completed
 */

class DjikstraNode {
public:
    unsigned long id;
    double cost;
    bool launched;
    bool done;

    DjikstraNode(){
        id = 0;
        cost = 1e100;
        launched = false;
        done=false;
    }

  void save(graphlab::oarchive& oarc) const {
    oarc << id << cost << launched << done;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> id >> cost >> launched >> done;
  }
};

/*
 * PrestigeAnalysisNode
 * Graph Node class for running multiple djikstra tree algorithms simultaneously
 * Contains a map of node id's to DjikstraNode instances
 * bookkeeping components
 *
 */
class PrestigeAnalysisNode {
public:
    std::map<long,DjikstraNode> djikstra_pieces;
    double local_value;
    double total;
    long count;
    int edge_count;

    PrestigeAnalysisNode(){
        local_value=0.0;
        total=0.0;
        count=0;
        edge_count=-1;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << djikstra_pieces << local_value << total << count << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> djikstra_pieces >> local_value >> total >> count >> edge_count;
    }
};

/*
 * Gather class for the Djikstra algorithm.
 * id: node id of the incoming edge's other end
 * cost: shortest path cost at the time this node gathers its edges
 * edge_count: a count of gathered edges
 *
 */
class Gather {
public:
    long id;
    double cost;
    int edge_count;

    Gather(){
        id=0;
        cost=0.0;
        edge_count=1;
    }

    Gather& operator+=(const Gather& other){
        if(other.id < 0){
            return *this;
        }
        if(this->id < 0){
            return *this;
        }
        if (cost <= other.cost){
            this->edge_count++;
            return *this;
        }
        this->edge_count += other.edge_count;
        return *this;
    }


    void save(graphlab::oarchive& oarc) const {
       oarc << id << cost << edge_count;
     }

     void load(graphlab::iarchive& iarc) {
       iarc >> id >> cost >> edge_count;
     }

};

/*
 * GatherMultiTree
 * map of djisktra root id's to their asociated content for that tree
 *
 */
class GatherMultiTree {
public:
    std::map<long,Gather> content;
    int edge_count;

    GatherMultiTree(){
        edge_count=0;
    }

    GatherMultiTree& operator+=(const GatherMultiTree& other){
        return *this;
    }

    void save(graphlab::oarchive& oarc) const {
        oarc << content << edge_count;
    }

    void load(graphlab::iarchive& iarc) {
        iarc >> content >> edge_count;
    }
};

typedef PrestigeAnalysisNode vertex_data_type;

typedef GatherMultiTree gather_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, double> graph_type;

/*
 * Loads graphs in the form 'id (id edge_strength)*'
 *
 */
bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  std::stringstream strm(textline);
  graphlab::vertex_id_type vid;
  // first entry in the line is a vertex ID
  strm >> vid;
  PrestigeAnalysisNode node;
  // insert this vertex with its label
  graph.add_vertex(vid, node);
  // while there are elements in the line, continue to read until we fail
  double edge_val=1.0;
  while(1){
    graphlab::vertex_id_type other_vid;
    strm >> other_vid;
    strm >> edge_val;
    if (strm.fail())
      break;
    graph.add_edge(vid, other_vid,edge_val);
  }

  return true;
}

/*
 * Algorithm class whose sole purpose is to reset launched and done booleans
 * for all id's in a PrestigeAnalysisNode
 */
class ClearBooleans :
        public graphlab::ivertex_program<graph_type, gather_type>,
        public graphlab::IS_POD_TYPE {
public:
  edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        GatherMultiTree g;
        return g;
  }

  void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
      for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
          iter != vertex.data().djikstra_pieces.end(); ++iter){
          long key = iter->first;
          vertex.data().djikstra_pieces[key].launched = false;
          vertex.data().djikstra_pieces[key].done = false;
          vertex.data().djikstra_pieces[key].cost = 0.0;
      }
  }

  edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
          return graphlab::NO_EDGES;
  }

  void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
  }
};


/*
 * Djikstra Algorithm Class
 *
 * Starting from the starting nodes, create an id for this root and signal
 * all neighbors to start the calculations. Set launched when started, done
 * when all edges have been signaled.
 *
 * As a signal is receieved collect edges to determine if the best path has
 * changed.  If it has, update. If the first signal is receieved, marked
 * the node as launched and then mark the node done after signaling neighbors.
 *
 * The process terminates when all nodes active have no neighbors that are not done.
 */
class DjikstraAlgorithm :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::IN_EDGES;
    }

    gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    Gather g;
    GatherMultiTree tree;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key=iter->first;
            if((edge.source().data().djikstra_pieces[key].launched == true)&&
                    (edge.source().data().djikstra_pieces[key].done == false)){
                double c = edge.data() + edge.source().data().djikstra_pieces[key].cost;
                g.cost = c;
                g.id = edge.source().data().djikstra_pieces[key].id;
                g.edge_count = 1;
                tree.content[key] = g;
            }else{
                g.id=0;
            }
        }
	return tree;
    }

    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces[key].launched == false){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                    if(vertex.data().djikstra_pieces[key].cost > total.content.find(key)->second.cost){
                    vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                    vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
                    }else{
                    vertex.data().djikstra_pieces[key].done = true;
                    }
            }else{
                vertex.data().djikstra_pieces[key].done = true;
            }
        }
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)==vertex.data().djikstra_pieces.end()){
                vertex.data().djikstra_pieces[key].launched = true;
                vertex.data().edge_count = total.edge_count;
                vertex.data().djikstra_pieces[key].cost = total.content.find(key)->second.cost;
                vertex.data().djikstra_pieces[key].id = total.content.find(key)->second.id;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
        bool done = true;
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(vertex.data().djikstra_pieces.find(key)->second.launched &&
                    !vertex.data().djikstra_pieces.find(key)->second.done){
                done = false;
            }
        }
        if(!done){
            return graphlab::OUT_EDGES;
        }else{
            return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                context.signal(edge.target());
            }
    }
  }
};

size_t num_vertices = 3000;
size_t desired_sample_size = 3000;
size_t selected_sample_size = 0;

/*
 * For every node, print the previous node in its spanning tree for all spanning trees this node is in.
 *
 */
struct djikstra_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t";
    double value = 0.0;
    for(std::map<long,DjikstraNode>::const_iterator iter = v.data().djikstra_pieces.begin();
        iter != v.data().djikstra_pieces.end();++iter){
        value += iter->second.cost;
    }
    value /= selected_sample_size;
    strm << value << std::endl;
    return strm.str();
  }
  std::string save_edge (graph_type::edge_type e) { return ""; }
};

/*
 * Select ~3000 root nodes or an exact count which gives up around +/-3% accuracy
 * in prestige measures. It is a constant memory random selector.
 */
bool selectVertices(const graph_type::vertex_type& vertex){
    unsigned int r = random();
//    std::cout << "Random seed is " << r << std::endl;
    if(r < (desired_sample_size * RAND_MAX / num_vertices)){
        selected_sample_size++;
        return true;
    }
    return false;
}


/*
 * Collect shortest path cost while traversing the spanning tree.
 * Every spanning tree is collected by default.  The algorithm is responsible
 * for filtering out entries where the collecting node is not next in the
 * spanning tree.
 */
class PrestigeGather{
public:
    std::map<long,double> counts;
    std::map<long,long> edge_count;
	
 void save(graphlab::oarchive& oarc) const {
    oarc << counts << edge_count;
  }

  void load(graphlab::iarchive& iarc) {
    iarc >> counts >> edge_count;
  }

  PrestigeGather& operator+=(const PrestigeGather& gather){
    for(std::map<long, double>::const_iterator iter = this->counts.begin();
        iter != this->counts.end(); ++iter ){
                long key = iter->first;
                this->counts[key] += gather.counts.find(key)->second;
                this->edge_count[key] += gather.edge_count.find(key)->second;
	}
    for(std::map<long, double>::const_iterator iter = gather.counts.begin();
            iter != gather.counts.end(); ++iter){
                long key = iter->first;
                if(this->counts.find(key) != this->counts.end()){
                        this->counts[key] = gather.counts.find(key)->second;
                        this->edge_count[key] = gather.edge_count.find(key)->second;
                }
    }
    return *this;
  }


};

/*
 * Walk the spanning trees of a randomly selected subset of djikstra trees,
 * recording the shortest path cost at each node.  The sum represents a
 * sample that is within +/-3% of the real prestige score. Summing the scores
 * is done during output and must be independently aggregated if the graph is
 * not outputted.
 */
class PrestigeAlgorithm :
  public graphlab::ivertex_program<graph_type, PrestigeGather>,
  public graphlab::IS_POD_TYPE {
    bool changed;

  public:
    edge_dir_type gather_edges(icontext_type& context, const vertex_type& vertex) const {
      return graphlab::IN_EDGES;
    }

    PrestigeGather gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
    PrestigeGather g;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key= iter->first;
        if(edge.target().data().djikstra_pieces[key].id == vertex.id()){
            if(edge.source().data().djikstra_pieces[key].launched == true){
                g.counts[key] = edge.source().data().djikstra_pieces[key].cost + edge.data();
                g.edge_count[key] = 1;
            }
        }
	}
    return g;
    }

    void apply(icontext_type& context, vertex_type& vertex, const PrestigeGather& total) {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
                iter != vertex.data().djikstra_pieces.end(); ++iter){
            long key = iter->first;
            if(total.edge_count.find(key)->second==0){
                vertex.data().djikstra_pieces[key].launched = true;
            }
            if((vertex.data().djikstra_pieces[key].launched == true)&&
                    (vertex.data().djikstra_pieces[key].done == false)&&
                    (((long)vertex.data().djikstra_pieces[key].cost)==total.edge_count.find(key)->second)){
                vertex.data().djikstra_pieces[key].done = true;
                vertex.data().djikstra_pieces[key].cost = (double)total.edge_count.find(key)->second;
            }
        }
    }

    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const {
      // if vertex data changes, scatter to all edges.
     	bool done = true;
    for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
        iter != vertex.data().djikstra_pieces.end(); ++iter){
        long key = iter->first;
        if(vertex.data().djikstra_pieces.find(key)->second.launched && !vertex.data().djikstra_pieces.find(key)->second.done){
        		done = false;
      		}
        }
        if(!done){
                return graphlab::OUT_EDGES;
        }else{
                return graphlab::NO_EDGES;
        }
    }

    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const {
        for(std::map<long, DjikstraNode>::const_iterator iter = vertex.data().djikstra_pieces.begin();
            iter != vertex.data().djikstra_pieces.end(); ++iter){
                long key = iter->first;
                if((vertex.data().djikstra_pieces.find(key)->second.done == false) &&
                        (vertex.data().djikstra_pieces.find(key)->second.launched == true)){
                        context.signal(edge.target());
                }
        }
    }
};

int main (int argc, char** argv){
    // Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
    global_logger().set_log_level(LOG_INFO);

    // Parse command line options -----------------------------------------------
    graphlab::command_line_options clopts("Prestige Algorithm");
    std::string graph_dir;
    clopts.attach_option("graph", graph_dir, "The graph file. Required ");
    clopts.add_positional("graph");
    clopts.attach_option("samplesize", desired_sample_size, "(Sample size) the number of spanning trees to calculate");

    std::string saveprefix;
    clopts.attach_option("saveprefix", saveprefix,
                         "If set, will save the resultant prestige score to a "
                         "sequence of files with prefix saveprefix");

    if(!clopts.parse(argc, argv)) {
      dc.cout() << "Error in parsing command line arguments." << std::endl;
      return EXIT_FAILURE;
    }
    if (graph_dir == "") {
      dc.cout() << "Graph not specified. Cannot continue";
      return EXIT_FAILURE;
    }

    // Build the graph ----------------------------------------------------------
    graph_type graph(dc);
    dc.cout() << "Loading graph using line parser" << std::endl;
    graph.load(graph_dir, line_parser);

    dc.cout() << "#vertices: " << graph.num_vertices() << " #edges:" << graph.num_edges() << std::endl;

    graphlab::omni_engine<DjikstraAlgorithm> engine(dc, graph, "asynchronous", clopts);

    num_vertices = graph.num_vertices();
    graphlab::vertex_set start_set = graph.select(selectVertices);
    engine.signal_vset(start_set);
    engine.start();

    const float runtime = engine.elapsed_seconds();
    dc.cout() << "Finished Djikstra engine in " << runtime << " seconds." << std::endl;

    graphlab::omni_engine<ClearBooleans> engine2(dc,graph,"asynchronous",clopts);
    engine2.signal_all();
    engine2.start();

    const float runtime2 = engine.elapsed_seconds();
    dc.cout() << "Finished graph reset in " << runtime2 << " seconds." << std::endl;

    graphlab::omni_engine<PrestigeAlgorithm> engine3(dc,graph,"asynchronous",clopts);
    engine3.signal_vset(start_set);
    engine3.start();

    const float runtime3 = engine.elapsed_seconds();
    dc.cout() << "Finished prestige calculations in " << runtime3 << " seconds." << std::endl;

    if (saveprefix != "") {
      graph.save(saveprefix, djikstra_writer(),
         false,  // do not gzip
         true,   //save vertices
         false); // do not save edges
    }


    graphlab::mpi_tools::finalize();
    return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/CMakeLists.txt
================================================
project(GraphProcessing)

add_graphlab_executable(simple_undirected_triangle_count simple_undirected_triangle_count)
add_graphlab_executable(undirected_triangle_count undirected_triangle_count.cpp)
add_graphlab_executable(directed_triangle_count directed_triangle_count.cpp)
add_graphlab_executable(pagerank pagerank.cpp)
add_graphlab_executable(kcore kcore.cpp)
add_graphlab_executable(format_convert format_convert.cpp)
add_graphlab_executable(sssp sssp.cpp)
add_graphlab_executable(simple_coloring simple_coloring.cpp)
add_graphlab_executable(degree_ordered_coloring degree_ordered_coloring.cpp)
add_graphlab_executable(saturation_ordered_coloring saturation_ordered_coloring.cpp)
add_graphlab_executable(connected_component connected_component.cpp)
add_graphlab_executable(connected_component_stats connected_component_stats.cpp)
add_graphlab_executable(approximate_diameter approximate_diameter.cpp)
add_graphlab_executable(eigen_vector_normalization eigen_vector_normalization.cpp)
add_graphlab_executable(graph_laplacian graph_laplacian.cpp)
add_graphlab_executable(partitioning partitioning.cpp)

# add_graphlab_executable(warp_pagerank warp_pagerank.cpp)
# add_graphlab_executable(warp_pagerank2 warp_pagerank2.cpp)
# add_graphlab_executable(warp_coloring warp_coloring.cpp)
# add_graphlab_executable(warp_bond_percolation warp_bond_percolation.cpp)
# add_graphlab_executable(warp_pagerank_sweeps warp_pagerank_sweeps.cpp)

add_graphlab_executable(TSC TSC.cpp)
requires_eigen(TSC)


================================================
FILE: toolkits/graph_analytics/TSC.cpp
================================================
#include <graphlab.hpp>
#include <string>
#include <iostream>
#include <vector>
#include <cmath>
#include <numeric>
#include <limits>
#include <graphlab/engine/warp_engine.hpp> 
#include <graphlab/engine/warp_parfor_all_vertices.hpp> 
#include <unsupported/Eigen/MatrixFunctions>

#include <Eigen/Dense>

int verbose;
std::vector<double> Hleft;
//#include <functional>

/*

  Total Subgraph Centrality.

  For a graph G with adjacency matrix A, TSC(G) = exp(A)*b, where 1 is
  the ones vector.


  We're going to implement this with an Arnoldi solver, following Saad
  (1992).

  The algorithm works like this:
  Choose a maximum iteration m.  Then make matrices V and H:
  b = ones(A.nodecount)
  V[0] = b/||b||
  for j in 0..m:
      w = A*V[j]
      for i in 0..j:
         H[i,j] = (w,V[i])
	 w = w - H[i,j] * V[i]
      H[j+1,j] = ||w||
      V[j+1] =w/||w||

  Then TSC = exp(A)*b ~= (V * exp(H) / ||b||)[:,0].  Stop when
  successive approximations converge, or we run out of steps.  

  We still have that matrix exponential, but it's small and dense, and
  there's an implementation in Eigen. 
  

*/

class node {
public: 
std::vector<double> V;
double w;
  double TSC;
  double prev;
  node(): w(0.0),TSC(0.0),prev(0.0){};
  void load(graphlab::iarchive& infile) {
    infile>>V>>w>>TSC>>prev;
  }
  void save(graphlab::oarchive& outfile) const {
    outfile<<V<<w<<TSC<<prev;
  }
  
  
};

class edge {
public:
  double weight;
  edge(): weight(1.0) {} ;
  edge(double weight) : weight(weight) {};
  

  void save(graphlab::oarchive& outfile) const {
    outfile<<weight; 
  }
  
  void load(graphlab::iarchive& infile) {
    infile>>weight; 
  }
  
  
};


typedef node vertex_data_type;
typedef edge edge_data_type;
typedef graphlab::distributed_graph<vertex_data_type, edge_data_type> graph_type; 
typedef graphlab::warp::warp_engine<graph_type> engine_type; 


// This is just a little class to be used to find the maximum change in TSC values. 
class max_finder{
public:
  double data;
  max_finder& operator+=(const max_finder& other){
    if (this->data <other.data){
      this->data = other.data;
    }
    return *this;
  }
  
  max_finder(): data(std::numeric_limits<double>::max()) {};
  max_finder(double x): data(x) {};
  void load(graphlab::iarchive& infile) {
    infile>>data;
  }
  void save(graphlab::oarchive& outfile) const {
    outfile<<data;
  }


};
  

// These three functions compute the A*w step of the
// Arnoldi iteration. 
double arnoldi_map(graph_type::edge_type e, 
		   graph_type::vertex_type v){
  return v.data().V.back();

}
void arnoldi_combine(double& v1, const double& v2) {
  v1 += v2;
}
void AVj_to_w(graph_type::vertex_type& v) {
  v.data().w = graphlab::warp::map_reduce_neighborhood(v, 
						       graphlab::IN_EDGES,
						       arnoldi_map,
						       arnoldi_combine);
  return;
}

// Pushes the current w onto the V matrix
void w_to_v(graph_type::vertex_type& v){
  v.data().V.push_back(v.data().w);
}
// Scales w by a constant factor.  This is meant to be called through transform_vertices 
// with a boost::bind to take care of the argument list
void scale_w(graph_type::vertex_type& v, double scale_factor){
  v.data().w /= scale_factor;
}

// Dumps the TSC to stdout
void print_TSC(graph_type::vertex_type& v){
  std::cout<<v.id()<<" " <<v.data().TSC<<std::endl;
}

// For debugging.
void print_w(graph_type::vertex_type& v){
  std::cout<<v.id()<<" " <<v.data().w<<" "<<v.data().V.size()<<" "<<v.data().V.back()<<std::endl;
}


// Make initial vector if we want a column from exp(A)
void initialize_column(graph_type::vertex_type& v, int i,int m) {
  if (v.id()==i){
    v.data().w = 1.0;
  } else{
    v.data().w = 0.0;
  }
  v.data().V.reserve(m);

}
// Make initial vector if we want the TSC
void initialize_TSC(graph_type::vertex_type& v, int m) {
  v.data().w = 1.0/sqrt((double)m);
  v.data().V.reserve(m);
}

// ||w||**2
double sum_w(const graph_type::vertex_type&  v){
  return v.data().w*v.data().w; 
}

// w*V[i].  Call via boost::bind to select the i.
double w_dot_V(const graph_type::vertex_type&  v, int i){
  return v.data().w * v.data().V[i];
}

// w - (h,w)*V[i].  Call via boost::bind to set i, hdot. 
void w_minus_hdot(graph_type::vertex_type& v, int i, double hdot) {
  v.data().w -= hdot * v.data().V[i];
}


// TSC = V*Hleft
void accumulate_hleft(graph_type::vertex_type&v){
  v.data().prev = v.data().TSC;
  v.data().TSC = 0;
  for(int j=0;j<Hleft.size();j++){
    v.data().TSC += v.data().V[j] * Hleft[j];
  }
  if (verbose){
    if (v.id()<10){
      std::cout<<"TSC "<<v.id()<<" "<<Hleft.size()<<" "<<v.data().TSC<<" "<<v.data().prev<< " "<<(v.data().TSC-v.data().prev)/(1e-15+v.data().TSC) <<std::endl;
    }
  }
  return;
}

// sum ((TSC-prevTSC)/TSC)
double total_error(const graph_type::vertex_type& v){
  return fabs((v.data().TSC-v.data().prev)/(1e-15+v.data().TSC));
}

// max ((TSC-prevTSC)/TSC)
max_finder max_error(const graph_type::vertex_type& v){
  return max_finder(fabs((v.data().TSC-v.data().prev)/(1e-15+v.data().TSC)));
}


int main(int argc, char** argv) {
  graphlab::command_line_options clopts("Total Subgraph Centrality");
  graphlab::distributed_control dc;
  std::string infile;
  std::string format = "tsv";
  int verbose=0;
  int m=100;
  int column = -1;
  clopts.attach_option("graph",infile,"Input graph.");
  clopts.attach_option("format",format,"Input format. Default tsv.");
  clopts.attach_option("m",m,"Maximum number of orthogonal vectors to approximate with. Default 100.");
  clopts.attach_option("column",column,"Column of exponential to calculate (instead of row-sum)");
  clopts.attach_option("verbose",verbose,"Verbosity level.");
  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (infile==""){
    dc.cout() <<"ERROR: Must specify --graph! \n";
    return EXIT_FAILURE;
  }
  graph_type graph(dc,clopts);
  graph.load_format(infile, format);
  graph.finalize();
  if (m>graph.num_vertices()) {
    m = graph.num_vertices();
  }
  engine_type engine(dc,graph,clopts);

  engine.signal_all();
  Eigen::MatrixXd H = Eigen::MatrixXd::Zero(m+1,m+1);
  Hleft.resize(m);
  for(int i=0;i<m;i++) { Hleft[i] = 0.0;}
  double beta;
  if (column>=0) {
    if (column>graph.num_vertices()){
      column = 0;
    }
    graph.transform_vertices(boost::bind(initialize_column,_1,column,m));
    beta = 1.0;
  } else {
    graph.transform_vertices(boost::bind(initialize_TSC,_1,m));
    beta = sqrt(m);
  }


  // The first column of V is just w
  graph.transform_vertices(w_to_v);


  for(int j=0;j<m;j++) {
    graphlab::warp::parfor_all_vertices(graph,AVj_to_w);
    // We should be able to move this loop inside the map-reduce and transform steps
    for(int i=0;i<=j;i++) {
      H(i,j)= graph.map_reduce_vertices<double>(boost::bind(w_dot_V,_1,i));
      graph.transform_vertices(boost::bind(w_minus_hdot,_1,i,H(i,j)));
    }
    H(j+1,j)= sqrt(graph.map_reduce_vertices<double>(sum_w));
    // If we're in this case, it means we have a spanning set and we shouldn't
    // prep for the next iteration.  Otherwise, make the data we'll need next time.
    if ( ( !std::isnan(H(j+1,j))) && (H(j+1,j) > 0) ) {
      graph.transform_vertices(boost::bind(scale_w, _1, H(j+1,j)));
      graph.transform_vertices(w_to_v); 
    }

    if (j>0) {
      // Have we converged? 
      Eigen::MatrixXd EH(H);
      EH = EH.exp();
      Hleft.resize(j+1);
      
      for(int i=0;i<=j;i++) { Hleft[i] = EH(i,0) * beta;}
      graph.transform_vertices(accumulate_hleft);
      if (j>1) {
 	max_finder largest_error= graph.map_reduce_vertices<max_finder>(max_error);
	double all_error = graph.map_reduce_vertices<double>(total_error);
	if (verbose){
	  std::cerr<<"ARNOLDI STEP FINISHED "<<j<<std::endl;
	  std::cout<<"MAX ERROR: "<<largest_error.data<<" TOTAL ERROR: "<<all_error<<std::endl;
	}
	if (largest_error.data < 1e-15) { break; };
	if (all_error < 1e-15){  break; }
      }
    }
    // If we know we are in the last iteration, break
    if (std::isnan(H(j+1,j))){ break;}
    if (fabs(H(j+1,j))<1e-15){ break;}

 
  }
  graph.transform_vertices(print_TSC);


}


================================================
FILE: toolkits/graph_analytics/approximate_diameter.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <map>
#include <time.h>

#include <graphlab.hpp>

//helper function
float myrand() {
  return static_cast<float>(rand() / (RAND_MAX + 1.0));
}

//helper function to return a hash value for Flajolet & Martin bitmask
size_t hash_value() {
  size_t ret = 0;
  while (myrand() < 0.5) {
    ret++;
  }
  return ret;
}

const size_t DUPULICATION_OF_BITMASKS = 10;

struct vdata {
  //use two bitmasks for consistency
  std::vector<std::vector<bool> > bitmask1;
  std::vector<std::vector<bool> > bitmask2;
  //indicate which is the bitmask for reading (or writing)
  bool odd_iteration;
  vdata() :
      bitmask1(), bitmask2(), odd_iteration(true) {
  }
  //for exact counting (but needs large memory)
  void create_bitmask(size_t id) {
    std::vector<bool> mask1(id + 2, 0);
    mask1[id] = 1;
    bitmask1.push_back(mask1);
    std::vector<bool> mask2(id + 2, 0);
    mask2[id] = 1;
    bitmask2.push_back(mask2);
  }
  //for approximate Flajolet & Martin counting
  void create_hashed_bitmask(size_t id) {
    for (size_t i = 0; i < DUPULICATION_OF_BITMASKS; ++i) {
      size_t hash_val = hash_value();
      std::vector<bool> mask1(hash_val + 2, 0);
      mask1[hash_val] = 1;
      bitmask1.push_back(mask1);
      std::vector<bool> mask2(hash_val + 2, 0);
      mask2[hash_val] = 1;
      bitmask2.push_back(mask2);
    }
  }

  void save(graphlab::oarchive& oarc) const {
    size_t num = bitmask1.size();
    oarc << num;
    for (size_t a = 0; a < num; ++a) {
      size_t size = bitmask1[a].size();
      oarc << size;
      for (size_t i = 0; i < size; ++i)
        oarc << (bool)bitmask1[a][i];
      for (size_t i = 0; i < size; ++i)
        oarc << (bool)bitmask2[a][i];
    }
    oarc << odd_iteration;
  }
  void load(graphlab::iarchive& iarc) {
    bitmask1.clear();
    bitmask2.clear();
    size_t num = 0;
    iarc >> num;
    for (size_t a = 0; a < num; ++a) {
      size_t size = 0;
      iarc >> size;
      std::vector<bool> mask1;
      for (size_t i = 0; i < size; ++i) {
        bool element = true;
        iarc >> element;
        mask1.push_back(element);
      }
      bitmask1.push_back(mask1);
      std::vector<bool> mask2;
      for (size_t i = 0; i < size; ++i) {
        bool element = true;
        iarc >> element;
        mask2.push_back(element);
      }
      bitmask2.push_back(mask2);
    }
    iarc >> odd_iteration;
  }
};

typedef graphlab::distributed_graph<vdata, graphlab::empty> graph_type;

//initialize bitmask
void initialize_vertex(graph_type::vertex_type& v) {
  v.data().create_bitmask(v.id());
}
//initialize bitmask
void initialize_vertex_with_hash(graph_type::vertex_type& v) {
  v.data().create_hashed_bitmask(v.id());
}

//helper function to compute bitwise-or
void bitwise_or(std::vector<std::vector<bool> >& v1,
    const std::vector<std::vector<bool> >& v2) {
  for (size_t a = 0; a < v1.size(); ++a) {
    while (v1[a].size() < v2[a].size()) {
      v1[a].push_back(false);
    }
    for (size_t i = 0; i < v2[a].size(); ++i) {
      v1[a][i] = v1[a][i] || v2[a][i];
    }
  }
}

struct bitmask_gatherer {
  std::vector<std::vector<bool> > bitmask;

  bitmask_gatherer() :
    bitmask() {
  }
  explicit bitmask_gatherer(const std::vector<std::vector<bool> > & in_b) :
    bitmask(){
    for(size_t i=0;i<in_b.size();++i){
      bitmask.push_back(in_b[i]);
    }
  }

  //bitwise-or
  bitmask_gatherer& operator+=(const bitmask_gatherer& other) {
    bitwise_or(bitmask, other.bitmask);
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    size_t num = bitmask.size();
    oarc << num;
    for (size_t a = 0; a < num; ++a) {
      size_t size = bitmask[a].size();
      oarc << size;
      for (size_t i = 0; i < size; ++i)
        oarc << (bool)bitmask[a][i];
    }
  }
  void load(graphlab::iarchive& iarc) {
    bitmask.clear();
    size_t num = 0;
    iarc >> num;
    for (size_t a = 0; a < num; ++a) {
      size_t size = 0;
      iarc >> size;
      std::vector<bool> mask1;
      for (size_t i = 0; i < size; ++i) {
        bool element = true;
        iarc >> element;
        mask1.push_back(element);
      }
      bitmask.push_back(mask1);
    }
  }
};

//The next bitmask b(h + 1; i) of i at the hop h + 1 is given as:
//b(h + 1; i) = b(h; i) BITWISE-OR {b(h; k) | source = i & target = k}.
class one_hop: public graphlab::ivertex_program<graph_type, bitmask_gatherer>,
    public graphlab::IS_POD_TYPE {
public:
  //gather on out edges
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }

  //for each edge gather the bitmask of the edge
  bitmask_gatherer gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    if (vertex.data().odd_iteration) {
      return bitmask_gatherer(edge.target().data().bitmask2);
    } else {
      return bitmask_gatherer(edge.target().data().bitmask1);
    }
  }

  //get bitwise-ORed bitmask and switch bitmasks
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    if (vertex.data().odd_iteration) {
      if (total.bitmask.size() > 0)
        bitwise_or(vertex.data().bitmask1, total.bitmask);
      vertex.data().odd_iteration = false;
    } else {
      if (total.bitmask.size() > 0)
        bitwise_or(vertex.data().bitmask2, total.bitmask);
      vertex.data().odd_iteration = true;
    }
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
  }
};

//copy the updated bitmask to the other
void copy_bitmasks(graph_type::vertex_type& vdata) {
  if (vdata.data().odd_iteration == false) { //odd_iteration has just finished
    vdata.data().bitmask2 = vdata.data().bitmask1;
  } else {
    vdata.data().bitmask1 = vdata.data().bitmask2;
  }
}

//count the number of vertices reached in the current hop
size_t absolute_vertex_data(const graph_type::vertex_type& vertex) {
    size_t count = 0;
    for (size_t i = 0; i < vertex.data().bitmask1[0].size(); ++i)
      if (vertex.data().bitmask1[0][i])
        count++;
    return count;
}

//count the number of vertices reached in the current hop with Flajolet & Martin counting method
size_t approximate_pair_number(std::vector<std::vector<bool> > bitmask) {
  float sum = 0.0;
  for (size_t a = 0; a < bitmask.size(); ++a) {
    for (size_t i = 0; i < bitmask[a].size(); ++i) {
      if (bitmask[a][i] == 0) {
        sum += (float) i;
        break;
      }
    }
  }
  return (size_t) (pow(2.0, sum / (float) (bitmask.size())) / 0.77351);
}
//count the number of notes reached in the current hop
size_t absolute_vertex_data_with_hash(
    const graph_type::vertex_type& vertex) {
    size_t count = approximate_pair_number(vertex.data().bitmask1);
    return count;
}

int main(int argc, char** argv) {
  std::cout << "Approximate graph diameter\n\n";
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  std::string datafile;
  float termination_criteria = 0.0001;
  //parse command line
  graphlab::command_line_options clopts(
                "Approximate graph diameter. "
                "Directions of edges are considered.");
  std::string graph_dir;
  std::string format = "adj";
  bool use_sketch = true;
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. This is not optional");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("tol", termination_criteria,
                       "The permissible change at convergence.");
  clopts.attach_option("use-sketch", use_sketch,
                       "If true, will use Flajolet & Martin bitmask, "
                       "which is more compact and faster.");

  if (!clopts.parse(argc, argv)){
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    std::cout << "--graph is not optional\n";
    return EXIT_FAILURE;
  }

  //load graph
  graph_type graph(dc, clopts);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  graph.finalize();

  time_t start, end;
  //initialize vertices
  time(&start);
  if (use_sketch == false)
    graph.transform_vertices(initialize_vertex);
  else
    graph.transform_vertices(initialize_vertex_with_hash);

  graphlab::omni_engine<one_hop> engine(dc, graph, exec_type, clopts);

  //main iteration
  size_t previous_count = 0;
  size_t diameter = 0;
  for (size_t iter = 0; iter < 100; ++iter) {
    engine.signal_all();
    engine.start();

    graph.transform_vertices(copy_bitmasks);

    size_t current_count = 0;
    if (use_sketch == false)
      current_count = graph.map_reduce_vertices<size_t>(absolute_vertex_data);
    else
      current_count = graph.map_reduce_vertices<size_t>(
          absolute_vertex_data_with_hash);
    dc.cout() << iter + 1 << "-th hop: " << current_count
        << " vertex pairs are reached\n";
    if (iter > 0
        && (float) current_count
            < (float) previous_count * (1.0 + termination_criteria)) {
      diameter = iter;
      dc.cout() << "converge\n";
      break;
    }
    previous_count = current_count;
  }
  time(&end);

  dc.cout() << "graph calculation time is " << (end - start) << " sec\n";
  dc.cout() << "The approximate diameter is " << diameter << "\n";

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/connected_component.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <string>
#include <iostream>
#include <algorithm>
#include <vector>
#include <map>
#include <boost/unordered_map.hpp>
#include <time.h>

#include <graphlab.hpp>
#include <graphlab/graph/distributed_graph.hpp>

struct vdata {
  uint64_t labelid;
  vdata() :
      labelid(0) {
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << labelid;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> labelid;
  }
};

typedef graphlab::distributed_graph<vdata, graphlab::empty> graph_type;

//set label id at vertex id
void initialize_vertex(graph_type::vertex_type& v) {
  v.data().labelid = v.id();
}

//message where summation means minimum
struct min_message {
  uint64_t value;
  explicit min_message(uint64_t v) :
      value(v) {
  }
  min_message() :
      value(std::numeric_limits<uint64_t>::max()) {
  }
  min_message& operator+=(const min_message& other) {
    value = std::min<uint64_t>(value, other.value);
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << value;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> value;
  }
};

class label_propagation: public graphlab::ivertex_program<graph_type, size_t,
    min_message>, public graphlab::IS_POD_TYPE {
private:
  size_t recieved_labelid;
  bool perform_scatter;
public:
  label_propagation() {
    recieved_labelid = std::numeric_limits<size_t>::max();
    perform_scatter = false;
  }

  //receive messages
  void init(icontext_type& context, const vertex_type& vertex,
      const message_type& msg) {
    recieved_labelid = msg.value;
  }

  //do not gather
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
  size_t gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    return 0;
  }

  //update label id. If updated, scatter messages
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    if (recieved_labelid == std::numeric_limits<size_t>::max()) {
      perform_scatter = true;
    } else if (vertex.data().labelid > recieved_labelid) {
      perform_scatter = true;
      vertex.data().labelid = recieved_labelid;
    }
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
    if (perform_scatter)
      return graphlab::ALL_EDGES;
    else
      return graphlab::NO_EDGES;
  }

  //If a neighbor vertex has a bigger label id, send a massage
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    if (edge.source().id() != vertex.id()
        && edge.source().data().labelid > vertex.data().labelid) {
      context.signal(edge.source(), min_message(vertex.data().labelid));
    }
    if (edge.target().id() != vertex.id()
        && edge.target().data().labelid > vertex.data().labelid) {
      context.signal(edge.target(), min_message(vertex.data().labelid));
    }
  }
};

class graph_writer {
public:
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "," << v.data().labelid << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};

int main(int argc, char** argv) {
  std::cout << "Connected Component\n\n";

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_DEBUG);
  //parse options
  graphlab::command_line_options clopts("Connected Component.");
  std::string graph_dir;
  std::string saveprefix;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. This is not optional");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the pairs of a vertex id and "
                       "a component id to a sequence of files with prefix "
                       "saveprefix");
  if (!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    std::cout << "--graph is not optional\n";
    return EXIT_FAILURE;
  }

  graph_type graph(dc, clopts);

  //load graph
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graph.load_format(graph_dir, format);
  graphlab::timer ti;
  graph.finalize();
  dc.cout() << "Finalization in " << ti.current_time() << std::endl;
  graph.transform_vertices(initialize_vertex);

  //running the engine
  time_t start, end;
  graphlab::omni_engine<label_propagation> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  time(&start);
  engine.start();

  //write results
  if (saveprefix.size() > 0) {
    graph.save(saveprefix, graph_writer(),
        false, //set to true if each output file is to be gzipped
        true, //whether vertices are saved
        false); //whether edges are saved
  }

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/connected_component_stats.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <string>
#include <iostream>
#include <algorithm>
#include <vector>
#include <map>
#include <boost/unordered_map.hpp>
#include <time.h>

#include <graphlab.hpp>
#include <graphlab/graph/distributed_graph.hpp>

struct vdata {
  std::vector<size_t> vids;
  void save(graphlab::oarchive& oarc) const {
    oarc << vids;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> vids;
  }
};

typedef graphlab::distributed_graph<vdata, graphlab::empty> graph_type;


void vertex_combine(vdata& a, const vdata& b) {
  for (size_t i = 0;i < b.vids.size(); ++i) a.vids.push_back(b.vids[i]);
}


bool ccoutput_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
  size_t split = textline.find_first_of(",");
  if (split == std::string::npos) return true;
  else {
    std::string t = textline;
    t[split] = 0;
    vdata data;
    data.vids.push_back(atol(t.c_str()));
    graph.add_vertex(atol(t.c_str() + split + 1), data);
    return true;
  }
}

struct size_counter {
  // a map from size to count
  boost::unordered_map<size_t, size_t> counts;

  size_counter() { }

  explicit size_counter(size_t size) {
    counts[size] = 1;
  }

  size_counter& operator+=(const size_counter& other) {
    boost::unordered_map<size_t, size_t>::const_iterator iter = other.counts.begin();
    while(iter != other.counts.end()) {
      counts[iter->first] += iter->second;
      ++iter;
    }
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << counts;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> counts;
  }
};

size_counter absolute_vertex_data(const graph_type::vertex_type& vertex) {
  return size_counter(vertex.data().vids.size());
}

class graph_writer {
public:
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << ":";
    for (size_t i = 0;i < v.data().vids.size(); ++i) {
      strm << v.data().vids[i] << " ";
    }
    strm << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};


int main(int argc, char** argv) {
  std::cout << "Connected Component\n\n";

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  //parse options
  graphlab::command_line_options clopts("Connected Component Stats.");
  std::string graph_dir;
  std::string saveprefix;
  std::string format = "adj";
  clopts.attach_option("graph", graph_dir,
                       "The graph file. This is not optional");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("saveprefix", saveprefix,
                       "save location");
  if (!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (graph_dir == "") {
    std::cout << "--graph is not optional\n";
    return EXIT_FAILURE;
  }

  graph_type graph(dc, clopts);

  //load graph
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  graphlab::timer ti;
  graph.set_duplicate_vertex_strategy(vertex_combine);
  graph.load(graph_dir, ccoutput_parser);
  graph.finalize();
  dc.cout() << "Complete Finalization in " << ti.current_time() << std::endl;

  ti.start();
  //take statistics
  size_counter stat = graph.map_reduce_vertices<size_counter>(
      absolute_vertex_data);

  dc.cout() << "graph calculation time is " << ti.current_time() << " sec\n";
  dc.cout() << "RESULT:\nsize\tcount\n";
  for (boost::unordered_map<size_t, size_t>::const_iterator iter = stat.counts.begin();
      iter != stat.counts.end(); iter++) {
    dc.cout() << iter->first << "\t" << iter->second << "\n";
  }
  
  //write results
  if (saveprefix.size() > 0) {
    graph.save(saveprefix, graph_writer(),
        false, //set to true if each output file is to be gzipped
        true, //whether vertices are saved
        false); //whether edges are saved
  }

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/degree_ordered_coloring.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/*
 * Graph coloring algorithm, such that vertex programs are scheduled in 
 * order of vertex degree. Includes trade-off featre for determining the
 * fraction of the graph to conduct ordered execution over random execution,
 * allowing user to specify run time - colour quality trade-off
 */

#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/macros_def.hpp>
#include <cmath>  /* for std::abs(double) */

typedef graphlab::vertex_id_type color_type;

/*
 * Vertex data: color and degree of node
 */
typedef struct {
  int color;
  int degree;

   // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << color << degree;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> color >> degree;
  }

} vertex_data_type;

#define UNCOLORED -1
/*
 * no edge data
 */
typedef graphlab::empty edge_data_type;
bool EDGE_CONSISTENT = false;
bool TRADE = false;

size_t graph_size = 0;
size_t fraction = 0;
int max_degree = 0;
int low_degree = INT_MAX;
signed int current_degree;

size_t already_signalled = 0;
std::set<int> used_colors;
std::set<int> degrees;
/*
 * This is the gathering type which accumulates an (unordered) set of
 * all neighboring colors 
 * It is a simple wrapper around a boost::unordered_set with
 * an operator+= which simply performs a set union.
 *
 * This struct can be significantly accelerated for small sets.
 * Small collections of vertex IDs should not require the overhead
 * of the unordered_set.
 */
struct set_union_gather {
  boost::unordered_set<color_type> colors;

  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    foreach(graphlab::vertex_id_type othervid, other.colors) {
      colors.insert(othervid);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << colors;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> colors;
  }
};
/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;


/*
 * On gather, we accumulate a set of all adjacent colors.
 */
class graph_coloring:
      public graphlab::ivertex_program<graph_type,
                                      set_union_gather>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_gather gather;
    color_type other_color = edge.source().id() == vertex.id() ?
                                 edge.target().data().color: edge.source().data().color;

    gather.colors.insert(other_color);
    return gather;
  }

  /*
   * the gather result now contains the colors in the neighborhood.
   * pick a different color and store it 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
    // find the smallest color not described in the neighborhood
    size_t neighborhoodsize = neighborhood.colors.size();
    for (color_type curcolor = 0; curcolor < neighborhoodsize + 1; ++curcolor) {
      if (neighborhood.colors.count(curcolor) == 0) {
        used_colors.insert(curcolor);
        vertex.data().color = curcolor;
        break;
      }
    }
  }

  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    if (EDGE_CONSISTENT) return graphlab::NO_EDGES;
    else return graphlab::ALL_EDGES;
  } 


  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {
    // both points have different colors!
    if (edge.source().data().color == edge.target().data().color) {
      context.signal(edge.source().id() == vertex.id() ? 
                      edge.target() : edge.source());
    }
  }
};

void initialize_vertex_values(graph_type::vertex_type& v) {
  v.data().degree = v.num_out_edges();
  degrees.insert(v.data().degree);
  v.data().color = UNCOLORED;
  if (v.data().degree > max_degree)
    max_degree = v.data().degree;
  if (v.data().degree < low_degree)
    low_degree = v.data().degree;
}


/*
 * A saver which saves a file where each line is a vid / color pair
 */
struct save_colors{
  std::string save_vertex(graph_type::vertex_type v) { 
    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(v.data().color) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};

typedef graphlab::async_consistent_engine<graph_coloring> engine_type;

graphlab::empty signal_vertices_at_degree(engine_type::icontext_type& ctx,
                                     const graph_type::vertex_type& vertex) {
  if (vertex.data().degree == current_degree) {
    already_signalled++;
    ctx.signal(vertex);
  }
  return graphlab::empty();
}

graphlab::empty signal_uncolored(engine_type::icontext_type& ctx,
                                     const graph_type::vertex_type& vertex) {
  if (vertex.data().color == UNCOLORED) {
    ctx.signal(vertex);
  }
  return graphlab::empty();
}

struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE {
  size_t degree;
  graphlab::vertex_id_type vid;
  max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) {
    if (degree < other.degree) {
      (*this) = other;
    }
    return (*this);
  }
};

max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) {
  max_deg_vertex_reducer red;
  red.degree = vtx.num_out_edges();
  red.vid = vtx.id();
  return red;
}

/**************************************************************************/
/*                                                                        */
/*                         Validation   Functions                         */
/*                                                                        */
/**************************************************************************/
size_t validate_conflict(graph_type::edge_type& edge) {
  return edge.source().data().color == edge.target().data().color;
}

inline bool isEqual(double x, double y)
{
  const double epsilon = 1e-5;
  return std::abs(x - y) <= epsilon * std::abs(x);
}

int main(int argc, char** argv) {

  //global_logger().set_log_level(LOG_INFO);

  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;


  dc.cout() << "This program computes a simple graph coloring of a"
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Graph coloring. "
    "Given a graph, this program computes a graph coloring of the graph."
    "The Asynchronous engine is used.");
  std::string prefix, format;
  std::string output;
  float alpha = 2.1;
  size_t powerlaw = 0;
  double trade = 0;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
   clopts.attach_option("output", output,
                       "A prefix to save the output.");
   clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
   clopts.attach_option("alpha", alpha,
                       "Alpha in powerlaw distrubution");
  clopts.attach_option("trade", trade,
                       "Execute tradeoff version. Probability of degree execution for node (0.0 to 1.0)");
  clopts.attach_option("edgescope", EDGE_CONSISTENT,
                       "Use Locking. ");

  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix.length() == 0 && powerlaw == 0) {
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (output == "") {
    dc.cout() << "Warning! Output will not be saved\n";
  }

  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);

  if (!isEqual(0.0, trade)) {
    TRADE = true;
  }
  
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, alpha, 100000000);
  } else { // Load the graph from a file
    if (prefix == "") {
      dc.cout() << "--graph is not optional\n";
      return EXIT_FAILURE;
    }
    else if (format == "") {
      dc.cout() << "--format is not optional\n";
      return EXIT_FAILURE;
    }
    graph.load_format(prefix, format);
  }
  graph.finalize();

  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
    << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;

  
  dc.cout() << "Initialising vertex data..." <<std::endl;
  graph.transform_vertices(initialize_vertex_values);
 
  dc.cout() << "Degrees range from "<< low_degree << " to " << max_degree << std::endl;

  // create engine to count the number of triangles
  dc.cout() << "Coloring..." << std::endl;
  if (EDGE_CONSISTENT) {
    clopts.get_engine_args().set_option("factorized", false);
  } else {
    clopts.get_engine_args().set_option("factorized", true);
  } 
  graphlab::async_consistent_engine<graph_coloring> engine(dc, graph, clopts);

  //Tradeoff between ordered and random vertex execution
  if (TRADE) {
    graph_size = graph.num_vertices();
    fraction = (int) graph_size * trade;
    dc.cout() << "Degree ordered coloring for " << fraction << " in " << graph_size << " vertices." << std::endl;
  }
  for (int x = max_degree; x >= low_degree; x--){
    if (degrees.find(x) != degrees.end()) {
      current_degree = x;
      engine.map_reduce_vertices<graphlab::empty>(signal_vertices_at_degree);  
      if (TRADE) {
        //Already signalled vertices for degree ordered execution
        if(already_signalled >= fraction) {
          engine.start();
          //Signal remaining vertices randomly
          engine.map_reduce_vertices<graphlab::empty>(signal_uncolored);  
          engine.start();
          break;
        }
      }
    }
  }

  if (!TRADE) {
    engine.start();
  }

  size_t conflict_count = graph.map_reduce_edges<size_t>(validate_conflict);
  if (conflict_count > 0) {
    dc.cout() << "Still uncolored, finalising..." << std::endl;
    engine.map_reduce_vertices<graphlab::empty>(signal_uncolored);
    engine.start();
    conflict_count = graph.map_reduce_edges<size_t>(validate_conflict);
  }

  dc.cout() << "Colored in " << ti.current_time() << " seconds" << std::endl;
  dc.cout() << "Colored using " << used_colors.size() << " colors" << std::endl;

  dc.cout() << "Num conflicts = " << conflict_count << "\n";
  if (output != "") {
    graph.save(output,
              save_colors(),
              false, /* no compression */
              true, /* save vertex */
              false, /* do not save edge */
              1); /* one file per machine */
  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main

================================================
FILE: toolkits/graph_analytics/directed_triangle_count.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/util/cuckoo_set_pow2.hpp>
#include <graphlab/macros_def.hpp>
/**
 This implements the exact counting procedure described in 

 Efficient Algorithms for Large-Scale Local Triangle Counting
 Luca Becchetti, Paolo Boldi, Carlos Castillo, Aristides Gioni  

  */
   

// Radix sort implementation from https://github.com/gorset/radix
// Thanks to Erik Gorset
//
/*
Copyright 2011 Erik Gorset. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.

THIS SOFTWARE IS PROVIDED BY Erik Gorset ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Erik Gorset OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of Erik Gorset.
*/
void radix_sort(graphlab::vertex_id_type *array, int offset, int end, int shift) {
    int x, y;
    graphlab::vertex_id_type value, temp;
    int last[256] = { 0 }, pointer[256];

    for (x=offset; x<end; ++x) {
        ++last[(array[x] >> shift) & 0xFF];
    }

    last[0] += offset;
    pointer[0] = offset;
    for (x=1; x<256; ++x) {
        pointer[x] = last[x-1];
        last[x] += last[x-1];
    }

    for (x=0; x<256; ++x) {
        while (pointer[x] != last[x]) {
            value = array[pointer[x]];
            y = (value >> shift) & 0xFF;
            while (x != y) {
                temp = array[pointer[y]];
                array[pointer[y]++] = value;
                value = temp;
                y = (value >> shift) & 0xFF;
            }
            array[pointer[x]++] = value;
        }
    }

    if (shift > 0) {
        shift -= 8;
        for (x=0; x<256; ++x) {
            temp = x > 0 ? pointer[x] - pointer[x-1] : pointer[0] - offset;
            if (temp > 64) {
                radix_sort(array, pointer[x] - temp, pointer[x], shift);
            } else if (temp > 1) {
                std::sort(array + (pointer[x] - temp), array + pointer[x]);
                //insertion_sort(array, pointer[x] - temp, pointer[x]);
            }
        }
    }
}

size_t HASH_THRESHOLD = 64;

// We on each vertex, either a vector of sorted VIDs
// or a hash set (cuckoo hash) of VIDs.
// If the number of elements is greater than HASH_THRESHOLD,
// the hash set is used. Otherwise the vector is used.
struct vid_vector{
  std::vector<graphlab::vertex_id_type> vid_vec;
  graphlab::cuckoo_set_pow2<graphlab::vertex_id_type, 3> *cset;
  vid_vector(): cset(NULL) { }
  vid_vector(const vid_vector& v):cset(NULL) {
    (*this) = v;
  }

  vid_vector& operator=(const vid_vector& v) {
    if (this == &v) return *this;
    vid_vec = v.vid_vec;
    if (v.cset != NULL) {
      // allocate the cuckoo set if the other side is using a cuckoo set
      // or clear if I alrady have one
      if (cset == NULL) {
        cset = new graphlab::cuckoo_set_pow2<graphlab::vertex_id_type, 3>(-1, 0, 2 * v.cset->size());
      }
      else {
        cset->clear();
      }
      (*cset) = *(v.cset);
    }
    else {
      // if the other side is not using a cuckoo set, lets not use a cuckoo set
      // either
      if (cset != NULL) {
        delete cset;
        cset = NULL;
      }
    }
    return *this;
  }

  ~vid_vector() {
    if (cset != NULL) delete cset;
  }

  // assigns a vector of vertex IDs to this storage.
  // this function will clear the contents of the vid_vector
  // and reconstruct it.
  // If the assigned values has length >= HASH_THRESHOLD,
  // we will allocate a cuckoo set to store it. Otherwise,
  // we just store a sorted vector
  void assign(const std::vector<graphlab::vertex_id_type>& vec) {
    clear();
    if (vec.size() >= HASH_THRESHOLD) {
        // move to cset
        cset = new graphlab::cuckoo_set_pow2<graphlab::vertex_id_type, 3>(-1, 0, 2 * vec.size());
        foreach (graphlab::vertex_id_type v, vec) {
          cset->insert(v);
        }
    }
    else {
      vid_vec = vec;
      if (vid_vec.size() > 64) {
        radix_sort(&(vid_vec[0]), 0, vid_vec.size(), 24);
      }
      else {
        std::sort(vid_vec.begin(), vid_vec.end());
      }
      std::vector<graphlab::vertex_id_type>::iterator new_end = std::unique(vid_vec.begin(),
                                               vid_vec.end());
      vid_vec.erase(new_end, vid_vec.end());
    }
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << (cset != NULL);
    if (cset == NULL) oarc << vid_vec;
    else oarc << (*cset);
  }


  void clear() {
    vid_vec.clear();
    if (cset != NULL) {
      delete cset;
      cset = NULL;
    }
  }

  size_t size() const {
    return cset == NULL ? vid_vec.size() : cset->size();
  }

  void load(graphlab::iarchive& iarc) {
    clear();
    bool hascset;
    iarc >> hascset;
    if (!hascset) iarc >> vid_vec;
    else {
      cset = new graphlab::cuckoo_set_pow2<graphlab::vertex_id_type, 3>(-1, 0, 2);
      iarc >> (*cset);
    }
  }
};

/*
  A simple counting iterator which can be used as an insert iterator.
  but only counts the number of elements inserted. Useful for
  use with counting the size of an intersection using std::set_intersection
*/
template <typename T>
struct counting_inserter {
  size_t* i;
  counting_inserter(size_t* i):i(i) { }
  counting_inserter& operator++() {
    ++(*i);
    return *this;
  }
  void operator++(int) {
    ++(*i);
  }

  struct empty_val {
    empty_val operator=(const T&) { return empty_val(); }
  };

  empty_val operator*() {
    return empty_val();
  }

  typedef empty_val reference;
};


/*
 * Computes the size of the intersection of two vid_vector's
 */
static uint32_t count_set_intersect(
             const vid_vector& smaller_set,
             const vid_vector& larger_set) {
  if (smaller_set.size() > larger_set.size()) {
    return count_set_intersect(larger_set, smaller_set);
  }
  if (smaller_set.cset == NULL && larger_set.cset == NULL) {
    size_t i = 0;
    counting_inserter<graphlab::vertex_id_type> iter(&i);
    std::set_intersection(smaller_set.vid_vec.begin(), smaller_set.vid_vec.end(),
                          larger_set.vid_vec.begin(), larger_set.vid_vec.end(),
                          iter);
    return i;
  }
  else if (smaller_set.cset == NULL && larger_set.cset != NULL) {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, smaller_set.vid_vec) {
      i += larger_set.cset->count(vid);
    }
    return i;
  }
  else if (smaller_set.cset != NULL && larger_set.cset == NULL) {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, larger_set.vid_vec) {
      i += smaller_set.cset->count(vid);
    }
    return i;
  }
  else {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, *(smaller_set.cset)) {
      i += larger_set.cset->count(vid);
    }
    return i;

  }
}


// This structure is used to hold the final triangle counts 
// on each vertex
struct triangle_count: public graphlab::IS_POD_TYPE {
  triangle_count(): out_triangles(0), in_triangles(0), 
                      through_triangles(0), cycle_triangles(0) { }
  // A is the example below
  /*
    A ---> B
    |   / 
    |  /
    v /
    C
           diagonal edge direction does not matter
  */
  uint32_t out_triangles;

/*
    A<--- B
    ^   / 
    |  /
    | /
    C
          diagonal edge direction does not matter
  */
  uint32_t in_triangles;

  /*
    A---> B
    ^   ^ 
    |  /
    | /
    C
  */
  uint32_t through_triangles;

  /*
    A---> B
    ^   / 
    |  /
    | v
    C
  */
  uint32_t cycle_triangles;

  triangle_count& operator+=(const triangle_count& other) {
    out_triangles += other.out_triangles;
    in_triangles += other.in_triangles;
    through_triangles += other.through_triangles;
    cycle_triangles += other.cycle_triangles;
    return *this;
  }
};

/*
 * Each vertex maintains a list of all its neighbors.
 * and a final count for the number of triangles it is involved in
 */
struct vertex_data_type {
  vertex_data_type(){ }
  // A list of all its neighbors
  vid_vector in_vid_set;
  vid_vector out_vid_set;
  triangle_count count;
 // The number of triangles this vertex is involved it.
  // only used if "per vertex counting" is used
  void save(graphlab::oarchive &oarc) const {
    oarc << in_vid_set << out_vid_set << count;
  }
  void load(graphlab::iarchive &iarc) {
    iarc >> in_vid_set >> out_vid_set >> count;
  }
};


// This structure is used to hold the final triangle counts 
// on each edge
struct edge_triangle_count: public graphlab::IS_POD_TYPE {
  edge_triangle_count(): s_s(0), st_st(0), 
                      st_s(0) { }
  // using notation from the paper
  // s_s is the intersection between outgoing of source and outgoing of target
  // st_st is intersection between incoming of source and incoming of target
  // st_s is intersectino between incoming of source and outgoing of target
  uint32_t s_s, st_st, st_s;

  edge_triangle_count & operator+=(const edge_triangle_count& other) {
    s_s += other.s_s;
    st_st += other.st_st;
    st_s += other.st_s;
    return *this;
  }
};


/*
 * Each edge is simply a counter of triangles
 */
typedef edge_triangle_count edge_data_type;


/*
 * This is the gathering type which accumulates an array of
 * all neighboring vertices.
 * It is a simple wrapper around a vector with
 * an operator+= which simply performs a  +=
 */
struct set_union_gather {
  graphlab::vertex_id_type v;
  std::vector<graphlab::vertex_id_type> vid_vec;

  set_union_gather():v(-1) {
  }

  size_t size() const {
    if (v == (graphlab::vertex_id_type)-1) return vid_vec.size();
    else return 1;
  }
  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    if (size() == 0) {
      (*this) = other;
      return (*this);
    }
    else if (other.size() == 0) {
      return *this;
    }

    if (vid_vec.size() == 0) {
      vid_vec.push_back(v);
      v = (graphlab::vertex_id_type)(-1);
    }
    if (other.vid_vec.size() > 0) {
      size_t ct = vid_vec.size();
      vid_vec.resize(vid_vec.size() + other.vid_vec.size());
      for (size_t i = 0; i < other.vid_vec.size(); ++i) {
        vid_vec[ct + i] = other.vid_vec[i];
      }
    }
    else if (other.v != (graphlab::vertex_id_type)-1) {
      vid_vec.push_back(other.v);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << bool(vid_vec.size() == 0);
    if (vid_vec.size() == 0) oarc << v;
    else oarc << vid_vec;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    bool novvec;
    v = (graphlab::vertex_id_type)(-1);
    vid_vec.clear();
    iarc >> novvec;
    if (novvec) iarc >> v;
    else iarc >> vid_vec;
  }
};


struct set_union_pair{
  set_union_gather in_set;
  set_union_gather out_set;

  set_union_pair& operator+=(const set_union_pair& other) {
    in_set += other.in_set;
    out_set += other.out_set;
    return (*this);
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << in_set << out_set;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> in_set >> out_set;
  }
 
};

/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;


/*
 * This class implements the triangle counting algorithm as described in
 * the header. On gather, we accumulate a set of all adjacent vertices.
 * If per_vertex output is not necessary, we can use the optimization
 * where each vertex only accumulates neighbors with greater vertex IDs.
 */
class triangle_count_program :
      public graphlab::ivertex_program<graph_type,
                                      set_union_pair>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  bool do_not_scatter;

  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_pair gather;
    // check the edge direction
    if (edge.source().id() == vertex.id()) {
      // this is an out_edge
      graphlab::vertex_id_type otherid = edge.target().id();
      gather.out_set.v = otherid;
    }
    else {
      // this is an in_edge
      graphlab::vertex_id_type otherid = edge.source().id();
      gather.in_set.v = otherid;
    }
    return gather;
  }

  /*
   * the gather result now contains the vertex IDs in the neighborhood.
   * store it on the vertex. 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
   do_not_scatter = false;
   if (neighborhood.in_set.vid_vec.size() == 0) {
     // neighborhood set may be empty or has only 1 element
     vertex.data().in_vid_set.clear();
     if (neighborhood.in_set.v != (graphlab::vertex_id_type(-1))) {
       vertex.data().in_vid_set.vid_vec.push_back(neighborhood.in_set.v);
     }
   }
   else {
     vertex.data().in_vid_set.assign(neighborhood.in_set.vid_vec);
   }


   if (neighborhood.out_set.vid_vec.size() == 0) {
     // neighborhood set may be empty or has only 1 element
     vertex.data().out_vid_set.clear();
     if (neighborhood.out_set.v != (graphlab::vertex_id_type(-1))) {
       vertex.data().out_vid_set.vid_vec.push_back(neighborhood.out_set.v);
     }
   }
   else {
     vertex.data().out_vid_set.assign(neighborhood.out_set.vid_vec);
   }

   do_not_scatter = vertex.data().in_vid_set.size() == 0 && 
                    vertex.data().out_vid_set.size() == 0 ;
  } // end of apply

  /*
   * Scatter over all edges to compute the intersection.
   * I only need to touch each edge once, so if I scatter just on the
   * out edges, that is sufficient.
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    if (do_not_scatter) return graphlab::NO_EDGES;
    else return graphlab::OUT_EDGES;
  }


  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {

    //vertex_type othervtx = edge.target();
    // ok. the real work happens here.
    
    const vertex_data_type& srclist = edge.source().data();
    const vertex_data_type& targetlist = edge.target().data();

    edge.data().s_s = count_set_intersect(srclist.out_vid_set,
                                    targetlist.out_vid_set);
    edge.data().st_st += count_set_intersect(srclist.in_vid_set,
                                    targetlist.in_vid_set);
    edge.data().st_s += count_set_intersect(srclist.in_vid_set,
                                    targetlist.out_vid_set);

  }
};

/*
 * This class is used in a second engine call if per vertex counts are needed.
 * The number of triangles a vertex is involved in can be computed easily
 * by summing over the number of triangles each adjacent edge is involved in
 * and dividing by 2. 
 */
class get_per_vertex_count :
      public graphlab::ivertex_program<graph_type, triangle_count>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  // We gather the number of triangles each edge is involved in
  triangle_count gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    triangle_count ret;
    if (edge.source().id() == vertex.id()) {
      ret.out_triangles += edge.data().s_s;
      ret.through_triangles += edge.data().st_st;
      ret.cycle_triangles += edge.data().st_s;
    }
    else {
      ret.in_triangles += edge.data().st_st;
    }
    return ret;
  }

  /* the gather result is the total sum of the number of triangles
   * each adjacent edge is involved in . Dividing by 2 gives the
   * desired result.
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& tc) {
    vertex.data().in_vid_set.clear();
    vertex.data().out_vid_set.clear();
    vertex.data().count = tc;
  }

  // No scatter
  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }


};

typedef graphlab::synchronous_engine<triangle_count_program> engine_type;

/* Used to sum over all the vertices in the graph in a
 * map_reduce_vertices call
 * to get the total number of triangles
 */
triangle_count get_vertex_counts(const graph_type::vertex_type& v) {
  return v.data().count;
}

/*
 * A saver which saves a file where each line is a vid / # triangles pair
 */
struct save_triangle_count{
  std::string save_vertex(graph_type::vertex_type v) { 
    triangle_count tc = v.data().count;
    double n_followed = v.num_out_edges();
    double n_following = v.num_in_edges();

    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(tc.in_triangles) + "\t" +
           graphlab::tostr(tc.out_triangles) + "\t" +
           graphlab::tostr(tc.through_triangles) + "\t" +
           graphlab::tostr(tc.cycle_triangles) + "\t" +
           graphlab::tostr(n_followed) + "\t" + 
           graphlab::tostr(n_following) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};


int main(int argc, char** argv) {
  std::cout << "This program counts the exact number of triangles in the "
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Exact Triangle Counting. "
    "Given a graph, this program computes the total number of triangles "
    "in the graph. An option (per_vertex) is also provided which "
    "computes for each vertex, the number of triangles it is involved in."
    "The algorithm assumes that each undirected edge appears exactly once "
    "in the graph input. If edges may appear more than once, this procedure "
    "will over count.");
  std::string prefix, format;
  std::string per_vertex;
  bool PER_VERTEX_COUNT = false;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
  clopts.attach_option("ht", HASH_THRESHOLD,
                       "Above this size, hash tables are used");
  clopts.attach_option("per_vertex", per_vertex,
                       "If not empty, will count the number of "
                       "triangles each vertex belongs to and "
                       "save to file with prefix \"[per_vertex]\". "
                       "The algorithm used is slightly different "
                       "and thus will be a little slower");
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix == "") {
    std::cout << "--graph is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  else if (format == "") {
    std::cout << "--format is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }

  if (per_vertex != "") PER_VERTEX_COUNT = true;
  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);
  graph.load_format(prefix, format);
  graph.finalize();
  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
            << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;
  
  // create engine to count the number of triangles
  dc.cout() << "Counting Triangles..." << std::endl;
  engine_type engine(dc, graph, clopts);
  engine.signal_all();
  engine.start();

  dc.cout() << "Counted in " << ti.current_time() << " seconds" << std::endl;
  dc.cout() << "Collecting results ... " << std::endl;
  graphlab::synchronous_engine<get_per_vertex_count> engine2(dc, graph, clopts);
  engine2.signal_all();
  engine2.start();
 
  if (PER_VERTEX_COUNT == false) {
    triangle_count count = graph.map_reduce_vertices<triangle_count>(get_vertex_counts);
    dc.cout() << count.in_triangles << " In triangles\n";
    dc.cout() << count.out_triangles << " Out triangles\n";
    dc.cout() << count.through_triangles << " Through triangles\n";
    dc.cout() << count.cycle_triangles << " Cycle triangles\n";
  }
  else {
   dc.cout() << "Saving Results...\n";
   dc.cout() << "Format is \n";
   dc.cout() << "   [vid]  [in triangles]  [out triangles]   [through triangles]  [cycle_triangles]  [#out edges] [#in edges]" << std::endl;
   graph.save(per_vertex,
            save_triangle_count(),
            false, /* no compression */
            true, /* save vertex */
            false, /* do not save edge */
            clopts.get_ncpus()); /* one file per machine */

  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/eigen_vector_normalization.cpp
================================================
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <stdio.h>
#include <stdlib.h>

#include <graphlab.hpp>

size_t num_data = 0;
size_t current_cluster = 0;
size_t last_updated_cluster = 0;
size_t num_clusters = 0;

//helper function to normalize a vector;
void normalize_eivec(std::vector<float>& vec) {
  float sum = 0.0;
  for (size_t i = 0; i < vec.size(); ++i) {
    sum += vec[i] * vec[i];
  }
  sum = sqrt(sum);
  for (size_t i = 0; i < vec.size(); ++i) {
    vec[i] /= sum;
  }
}

struct evec_vertex_data {
  std::vector<float> vec;
  evec_vertex_data() :
      vec(num_clusters, 0.0) {
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << vec;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> vec;
  }
};

struct evec_edge_data {
  float val;
  evec_edge_data() :
      val(0.0) {
  }
  explicit evec_edge_data(float in_val) :
      val(in_val) {
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << val;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> val;
  }
};

typedef graphlab::distributed_graph<evec_vertex_data, evec_edge_data> evec_graph_type;

// Read a line from a file and add values to vertices
bool evec_line_parser(evec_graph_type& graph, const std::string& filename,
    const std::string& textline) {
  std::stringstream strm(textline);
  size_t vid = 0;
  strm >> vid;
  float val = 0.0;
  size_t colcount = 0;
  while(strm >> val) {
    if (current_cluster + colcount >= num_clusters){
      break;
    }
    graph.add_edge(vid, num_data + current_cluster + colcount + 1, evec_edge_data(val));
    colcount++;
  }
  last_updated_cluster = current_cluster + colcount;

  return true;
}

//gather values belonging to the same data
struct id_and_val {
  std::vector<size_t> ids;
  std::vector<float> vals;

  id_and_val() :
      ids(), vals() {
  }
  id_and_val(size_t id, float val) :
      ids(), vals() {
    ids.push_back(id);
    vals.push_back(val);
  }

  id_and_val& operator+=(const id_and_val& other) {
    for (size_t i = 0; i < other.ids.size(); ++i) {
      ids.push_back(other.ids[i]);
      vals.push_back(other.vals[i]);
    }
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    size_t num = ids.size();
    oarc << num;
    for (size_t a = 0; a < num; ++a)
      oarc << ids[a];
    for (size_t a = 0; a < num; ++a)
      oarc << vals[a];
  }
  void load(graphlab::iarchive& iarc) {
    ids.clear();
    vals.clear();
    size_t num = 0;
    iarc >> num;
    for (size_t a = 0; a < num; ++a) {
      size_t id = 0;
      iarc >> id;
      ids.push_back(id);
    }
    for (size_t a = 0; a < num; ++a) {
      float val = 0;
      iarc >> val;
      vals.push_back(val);
    }
  }
};

//gather values belonging to the same data
class aggregate_values: public graphlab::ivertex_program<evec_graph_type,
    id_and_val>, public graphlab::IS_POD_TYPE {
public:
  //gather on out edges
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }
  id_and_val gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    return id_and_val(edge.target().id() - num_data - 1, edge.data().val);
  }

  //get values and make a vector
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    const std::vector<size_t>& ids = total.ids;
    const std::vector<float>& vals = total.vals;
    for (size_t i = 0; i < ids.size(); ++i) {
      vertex.data().vec[ids[i]] = vals[i];
    }
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
  }
};

void normalize_eigen_vector(evec_graph_type::vertex_type& v) {
  normalize_eivec(v.data().vec);
}

class evec_graph_writer {
public:
  std::string save_vertex(evec_graph_type::vertex_type v) {
    if(v.id()>num_data) return"";

    std::stringstream strm;
    strm << v.id() << " ";
    const std::vector<float>& vec = v.data().vec;
    for (size_t i = 0; i < vec.size(); ++i) {
      if (i != 0)
        strm << " ";
      strm << vec[i];
    }
    strm << "\n";

    return strm.str();
  }

  std::string save_edge(evec_graph_type::edge_type e) {
    return "";
  }
};

//read and normalize eigen vectors
int main(int argc, char** argv) {
  std::string datafile;
  size_t rank = 0;

  //parse command line
  graphlab::command_line_options clopts(
      "Normalize eigen vectors for graph partitioning");
  clopts.attach_option("data", datafile, "Input file prefix");
  clopts.attach_option("rank", rank, "Rank of Lanczos method");
  clopts.attach_option("clusters", num_clusters, "Number of clusters");
  clopts.attach_option("data-num", num_data, "Number of data points");
  if (!clopts.parse(argc, argv))
    return EXIT_FAILURE;
  if (datafile == "") {
    std::cout << "--data is not optional\n";
    return EXIT_FAILURE;
  }
  if (rank == 0) {
    std::cout << "--rank is not optional\n";
    return EXIT_FAILURE;
  }
  if (num_clusters == 0) {
    std::cout << "--clusters is not optional\n";
    return EXIT_FAILURE;
  }
  if (num_data == 0) {
    std::cout << "--data-num is not optional\n";
    return EXIT_FAILURE;
  }

  //load and normalize eigen vectors
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  evec_graph_type graph(dc, clopts);
  //special vertices corresponding to clusters
  for (size_t i = 0; i < num_clusters; ++i) {
    graph.add_vertex(num_data + i + 1, evec_vertex_data());
  }
  
  for (size_t i = 0; i < num_clusters; ++i) {
    //current_cluster = i;
    std::stringstream vec_filename;
    vec_filename << datafile;
    vec_filename << ".U.";
    vec_filename << i+1;
    vec_filename << "_";
    graph.load(vec_filename.str(), evec_line_parser);
    current_cluster = last_updated_cluster + 1;
    if (current_cluster >=  num_clusters) {
      break;
    }
  }
  graph.finalize();

  graphlab::omni_engine<aggregate_values> engine(dc, graph, "sync", clopts);
  engine.signal_all();
  engine.start();

  graph.transform_vertices(normalize_eigen_vector);

  graph.save(datafile + ".compressed", evec_graph_writer(), false, true, false,
      1);

  graphlab::mpi_tools::finalize();

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/format_convert.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */
#include <graphlab.hpp>

int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  size_t powerlaw = 0;
  std::string ingraph, informat;
  std::string outgraph, outformat;

  bool gzip = true;
  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("Graph Format Conversion.", true);

  clopts.attach_option("powerlaw", powerlaw,
                       "Generates a synthetic powerlaw graph with this many "
                       "vertices. If set, ingraph, and informat are ignored");
  clopts.attach_option("ingraph", ingraph,
                       "The input graph file. Required ");
  clopts.attach_option("informat", informat,
                       "The input graph file format");
  clopts.attach_option("outgraph", outgraph,
                       "The output graph file. Required ");
  clopts.attach_option("outformat", outformat,
                       "The output graph file format");
  clopts.attach_option("outgzip", gzip,
                       "If output is to be gzip compressed");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }
  if (powerlaw==0 && (ingraph.length() == 0 || outgraph.length() == 0)) {
    clopts.print_description();
    return EXIT_FAILURE;
  }
  typedef graphlab::distributed_graph<graphlab::empty, graphlab::empty> graph_type;
  graph_type graph(dc, clopts);

  dc.cout() << "Loading graph in format: "<< ingraph << std::endl;
  if (powerlaw) {
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000 /*max degree*/);
  } else {
    graph.load_format(ingraph, informat);
  }
  graph.finalize();

  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  graph.save_format(outgraph, outformat, gzip);

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/graph_analytics.dox
================================================
/**

\page graph_analytics Graph Analytics 

\brief The graph analytics toolkit contains applications for performing graph 
analytics and extracting patterns from the graph structure.

The toolkit current contains:
 - \ref graph_analytics_format_conversion "Graph Format Conversion"
 - \ref graph_analytics_triangle_undirected "Triangle Counting (undirected)"
 - \ref graph_analytics_triangle_directed "Triangle Counting (directed)"
 - \ref graph_analytics_pagerank "PageRank"
 - \ref graph_analytics_kcore "KCore Decomposition"
 - \ref graph_analytics_connected_component "Connected Component"
 - \ref graph_analytics_approximate_diameter "Approximate Diameter"
 - \ref graph_analytics_partitioning "Graph Partitioning"
 - \ref graph_coloring "Graph Coloring"
 - \ref graph_analytics_total_subgraph_centrality "Total Subgraph Centrality"

All toolkits take any of the graph formats described in \ref graph_formats . 


\section graph_analytics_format_conversion Format Conversion
This is primarily a utility program, providing conversion between any of the
Portable Graph formats described in \ref graph_formats.

To run:
\verbatim
> ./format_convert --ingraph=[input graph location] --informat=[input format type]
                   --outgraph=[output graph location] --outformat[output format type]
\endverbatim

The output is by default gzip compressed.
To disable, add the option,
\verbatim
 --outgzip=0
\endverbatim

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./format_convert ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS.  
If you have problems loading HDFS files, see the \ref FAQ.


\section graph_analytics_triangle_undirected Undirected Triangle Counting

The undirected triangle counting program can count the total number of 
triangles in a graph, and can also, with little more time, count the number of
triangles passing through each vertex in the graph.

It implements the edge-iterator algorithm described in 

T. Schank. Algorithmic Aspects of Triangle-Based Network Analysis.
    Phd in computer science, University Karlsruhe, 2007.

with several optimizations.

The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. It is important that the input be "cleaned"
and that reverse edges are removed: i.e. if edge 1-->5 exists, edge 5-->1 should
not exist. (The program will run without these edge removed. But numbers
may be erroneous).

To count the total number of triangles in a graph, the minimal set of options
required are:
\verbatim
> ./undirected_triangle_count --graph=[graph prefix] --format=[format]
\endverbatim
Output looks like:
\verbatim
Number of vertices: 875713
Number of edges:    4322051
Counting Triangles...
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 875713
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
Counted in 1.16463 seconds
13391903 Triangles
\endverbatim


To count the number of triangles on each vertex, the minimal set of options are:

\verbatim
> ./undirected_triangle_count --graph=[graph prefix] --format=[format] --per_vertex=[output prefix]
\endverbatim

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file contains two numbers: a Vertex ID, and the number
of triangles intersecting the vertex.

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./undirected_triangle_count ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --per_vertex (Optional. Default ""). If set, will write the output counts.
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b --ht (Optional. Default 64) The implementation uses a mix of vectors and
hash sets to optimize set intersection computation. This parameter sets the capacity
limit below which, vectors are used, and above which, hash sets are used.
\li \b –-graph_opts (Optional, Default empty) Any additional graph options. See
  graphlab::distributed_graph a list of options.


\section graph_analytics_triangle_directed Directed Triangle Counting

The directed triangle counting program counts the total number of 
directed triangles in a graph of each type, and can also output the number of
triangles of each type passing through each vertex in the graph.

We show the 4 possible types of triangles here:
In each case, the vertex being evaluated is the green vertex labeled "A". 
A dotted edge means that the direction of the edge do not matter.


Triangle Name     | Triangle Pattern
------------------|----------------------------
In Triangle       | \image html in_triangle.gif
Out Triangle      | \image html out_triangle.gif
Through Triangle  | \image html through_triangle.gif
Cycle Triangle    | \image html cycle_triangle.gif


The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. 

To count the total number of triangles in a graph, the minimal set of options
required are:
\verbatim
> ./directed_triangle_count --graph=[graph prefix] --format=[format]
\endverbatim
Output looks like this:
\verbatim
Number of vertices: 875713
Number of edges:    5105039
Counting Triangles...
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 875713
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
Counted in 1.962 seconds
Collecting results ... 
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 875713
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
28198954 In triangles
28198954 Out triangles
28198954 Through triangles
11669313 Cycle triangles
\endverbatim
Observe that the number of In, Out and Through triangles are identical. This
is because every In-triangle necessarily forms one Out and one Through triangle,
(and similarly for the rest). Also the number of Cycle Triangles must be divisible
by 3 since every cycle is counted 3 times, once on each vertex in the cycle.

To count the number of triangles on each vertex, the minimal set of options are:

\verbatim
> ./directed_triangle_count --graph=[graph prefix] --format=[format] --per_vertex=[output prefix]
\endverbatim

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file has the following format:
\verbatim
[vid]  [in triangles]  [out triangles]   [through triangles]  [cycle_triangles] [#out edges] [#in edges]
\endverbatim

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./directed_triangle_count ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --per_vertex (Optional. Default ""). If set, will write the output counts.
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b --ht (Optional. Default 64) The implementation uses a mix of vectors and
hash sets to optimize set intersection computation. This parameter sets the capacity
limit below which, vectors are used, and above which, hash sets are used.
\li \b -–graph_opts (Optional, Default empty) Any additional graph options. See
  graphlab::distributed_graph a list of options.


\section graph_analytics_pagerank PageRank 

The PageRank program computes the pagerank of each vertex. 
See the <a href="http://en.wikipedia.org/wiki/PageRank">Wikipedia article</a>
for details of the algorithm.

\subsection Input Graph
The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. 

\verbatim
> ./pagerank --graph=[graph prefix] --format=[format] 
\endverbatim

Alternatively, a synthetic power law graph of an arbitrary number of vertices
can be generated using:
\verbatim
> ./pagerank --powerlaw=[nvertices]
\endverbatim
The resultant graph will have powerlaw out-degree, and nearly constant in-degree. 
The actual generation process draws vertex degree from a truncated power-law
distribution with alpha=2.1. The distribution is truncated at maximum out-degree
100M to avoid allocating massive amounts of memory for creating the sampling distribution.

\subsection Computation Type 
There are several modes of computation that are supported. All will eventually
obtain the same solutions. 

### Classical
To get classical PageRank iterations, adding the option
\verbatim
> --iterations=[N Iterations]
\endverbatim

### Dynamic Synchronous (default) 
The dynamic synchronous computation only performs computation on vertices
that have not yet converged to the desired tolerance. The default tolerance
is 0.001. This can be modified by adding the option
\verbatim
>  --tol=[tolerance]
\endverbatim

### Dynamic Asynchronous
The dynamic asynchronous computation only performs computation on vertices
that have not yet converged to the desired tolerance. This uses the 
asynchronous engine. The default tolerance is 0.001. 
This can be modified by adding the option
\verbatim
>  --tol=[tolerance]
\endverbatim

\note This is known to be slow! PageRank does not benefit from the consistency
guaranteed by the asynchronous engine. A new engine is in development with 
weaker consistency semantics, but sufficient for pagerank. 


\subsection Output
To save the resultant pagerank of each vertex, include the option
\verbatim
> --saveprefix=[output prefix]
\endverbatim

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file contains two numbers: a Vertex ID, and the 
computed PageRank. Note that the output vector is NOT normalized, namely 
computed entries do not sum into one. 

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./pagerank ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Optional). The prefix from which to load the graph data
\li \b --format (Optional). The format of the input graph 
\li \b --powerlaw (Optional. Default 0). If set, generates synthetic powerlaw graph with
                        the specified number of vertices.
\li \b --saveprefix (Optional. Default ""). If set, will write the output counts.
\li \b --tol (Optional. Default=1E-3). Changes the convergence tolerance for the Dynamic
                          computation modes.
\li \b --iterations (Optional. Default 0). If set, runs classical PageRank iterations
                      for the specified number of iterations.
\li \b -–graph_opts (Optional, Default empty) Any additional graph options. See
  graphlab::distributed_graph a list of options.
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b -–engine (Optional, Default "synchronous") Sets the engine type. Must be
                  either "synchronous" or "asynchronous"
\li \b -–engine (Optional, Default "synchronous") Sets the engine options. Available
                  options depend on the engine type. See
                  graphlab::async_consistent_engine and
                  graphlab::synchronous_engine for details.


\section graph_analytics_kcore KCore Decomposition 
This program iteratively finds the KCore of the network.

\subsection Input Graph
The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. 

\verbatim
> ./kcore --graph=[graph prefix] --format=[format] 
\endverbatim
Output may look like:
\verbatim
K=0:  #V = 875713   #E = 4322051
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 0
K=1:  #V = 875713   #E = 4322051
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 153407
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=2:  #V = 711870   #E = 4160100
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 108715
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=3:  #V = 581712   #E = 3915291
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 69907
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=4:  #V = 492655   #E = 3668104
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 52123
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=5:  #V = 424155   #E = 3416251
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 41269
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=6:  #V = 367361   #E = 3158776
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 33444
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=7:  #V = 319194   #E = 2902138
INFO:     synchronous_engine.hpp(start:1213): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1257): 	Active vertices: 29201
INFO:     synchronous_engine.hpp(start:1307): 	 Running Aggregators
K=8:  #V = 274457   #E = 2629033
......
\endverbatim

To just get the informative lines:
\verbatim
> ./kcore --graph=[graph prefix] --format=[format] > k_out.txt
  ...
> cat k_out.txt
Computes a k-core decomposition of a graph.

Number of vertices: 875713
Number of edges:    4322051
K=0:  #V = 875713   #E = 4322051
K=1:  #V = 875713   #E = 4322051
K=2:  #V = 711870   #E = 4160100
K=3:  #V = 581712   #E = 3915291
K=4:  #V = 492655   #E = 3668104
K=5:  #V = 424155   #E = 3416251
K=6:  #V = 367361   #E = 3158776
K=7:  #V = 319194   #E = 2902138
K=8:  #V = 274457   #E = 2629033
K=9:  #V = 231775   #E = 2335154
K=10:  #V = 193406   #E = 2040738
K=11:  #V = 159020   #E = 1753273
K=12:  #V = 131362   #E = 1500517
K=13:  #V = 106572   #E = 1256952
K=14:  #V = 86302   #E = 1047053
K=15:  #V = 68409   #E = 849471
K=16:  #V = 53459   #E = 676076
K=17:  #V = 40488   #E = 519077
...
\endverbatim


The program can also save a copy of the graph at each stage by adding an
option.
\verbatim
> --savecores=[prefix]
\endverbatim

The resultant graphs will be saved with prefixes [prefix].K
For instance if prefix is <tt>out</tt>, 
The 0-Core graph may be saved in 
\verbatim
out.0.1_of_4
out.0.2_of_4
out.0.3_of_4
out.0.4_of_4
\endverbatim

The 5-Core graph will be saved in 
\verbatim
out.5.1_of_4
out.5.2_of_4
out.5.3_of_4
out.5.4_of_4
\endverbatim

and so on. 

The range of k-Core graphs to compute can be controlled by the <tt>kmin</tt>
and the <tt>kmax</tt> option described below.

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./kcore....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b --savecores (Optional. Default ""). The target prefix to save 
the resultant K-core graphs.
\li \b --kmin (Optional. Default 0). Only output result for the K-core graph starting
                        at K=kmin
\li \b --kmax (Optional. Default Inf). Only output result for the K-core graph 
                        up to K=kmax


\section graph_analytics_triangle_coloring Graph Coloring 

The graph coloring program implements a really simple graph coloring 
procedure: each vertex reads the colors of its neighbors and takes on 
the smallest possible color which does not conflict with its neighbors.

The procedure necessarily uses the asynchronous engine (it will never
converge with the synchronous engine).

The input to the system is a graph in any of the Portable graph format
described in \ref graph_formats. It is important that the input be "cleaned"
and that reverse edges are removed: i.e. if edge 1-->5 exists, edge 5-->1 should
not exist. (The program will run without these edge removed. But numbers
may be erroneous).

To color a graph, the minimal set of options required are:
\verbatim
> ./simple_coloring --graph=[graph prefix] --format=[format] --output=[output prefix]
\endverbatim
Output looks like:
\verbatim
Number of vertices: 875713
Number of edges:    5105039
Coloring...
Completed Tasks: 875713
Issued Tasks: 875713
Blocked Issues: 0
------------------
Joined Tasks: 0
Colored in 42.3684 seconds
Metrics server stopping.
\endverbatim

Observe that the number of Completed Tasks is identical to the number of vertices.
This is a result of the consistency model which ensures that the entire
vertex update is peformed "atomically".

Tne <tt>output prefix</tt> is where the output counts will be written. This
may be located on HDFS. For instance, if the <tt>output_prefix</tt> is <tt>"v_out"</tt>,
the output files will be written to:

\verbatim
v_out_1_of_16
v_out_2_of_16
...
v_out_16_of_16
\endverbatim

Each line in the output file contains two numbers: a Vertex ID, and the number
color of the vertex.

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./simple_coloring ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.


\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --ncpus (Optional. Default 2) The number of processors that will be used
for computation.  
\li \b –-graph_opts (Optional, Default empty) Any additional graph options. See
  --graph_help a list of options.
\li \b –-engine_opts (Optional, Default empty) Any additional engine options. See
  --engine_help a list of options.

A particularly relevant option is 
\verbatim
--engine_opts="factorized=true"
\endverbatim

This uses a weaker consistency setting which only guarantees that individual
"gather/apply/scatter" operations are atomic, but does not guarantee atomicity
of the entire update. As a result, this may require more updates to complete,
but could in practice run significantly faster.


\section graph_analytics_connected_component Connected Component

The connected component program can find all connected components in a 
graph, and can also count the number of vertices (size) of each connected 
component.

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.

To find connected components in a graph, the minimal set of options
required are:
\verbatim
> ./connected_component --graph=[graph prefix] --format=[format]
\endverbatim

Here is a toy example, graph with 6 nodes and 5 edges:
\verbatim
# example graph
# vertices: 6 edges: 5
1 2
2 3
4 5
4 6
5 6
\endverbatim

Assume file name is toy_graph, the command used for running connected compnents is
\verbatim
> ./connected_component --graph=toy_graph --format=tsv --saveprefix=out
\endverbatim


When you set <tt>--saveprefix=output_prefix</tt>, the pairs of a Vertex ID and a 
Component ID will be written to a sequence of files with prefix 
<tt>output_prefix</tt>. This may be located on HDFS. For instance, if the 
<tt>output_prefix</tt> is <tt>"v_out"</tt>, the output files will be written to:

\verbatim
out_1_of_4
out_2_of_4
out_3_of_4
out_4_of_4
\endverbatim

Let's examine the output. The first column is the node id, while the second column is it's assigned component number
(which is also the lowest node id in this component). In our case:
\verbatim
1,1
2,1
3,1
4,4
5,4
6,4
\endverbatim

There are two components. The first compoent is 1,2,3 and the second component is 4,5,6 

Note that this program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./connected_component ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.

\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --saveprefix (Optional). If set, pairs of a Vertex ID and a Component 
ID will be saved to a sequence of files with the given prefix.
\li \b --ncpus (Optional. Default 2). The number of processors that will be used
for computation.
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.
  
connected_components_stats is a helper utility, which computes histogram of component 
sizes. 

Using our toy example
\verbatim
> ./connected_component_stats --graph=out
Connected Component

INFO:     mpi_tools.hpp(init:63): MPI Support was not compiled.
INFO:     dc.cpp(init:573): Cluster of 1 instances created.
INFO:     distributed_graph.hpp(set_ingress_method:3200): Automatically determine ingress method: grid
Loading graph in format: adj
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_1_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_2_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_3_of_4
INFO:     distributed_graph.hpp(load_from_posixfs:2189): Loading graph from file: ./out_4_of_4
INFO:     distributed_ingress_base.hpp(finalize:185): Finalizing Graph...
INFO:     distributed_ingress_base.hpp(exchange_global_info:519): Graph info:
	 nverts: 2
	 nedges: 0
	 nreplicas: 2
	 replication factor: 1
Complete Finalization in 0.001965
graph calculation time is 2.4e-05 sec
RESULT:
size	count
3	2
\endverbatim

As expected, there are two components of size 3.


\section graph_analytics_approximate_diameter Approximate Diameter

The approximate diameter program can estimate a diameter of a graph. 
The implemented algorithm is based on the work, 

U Kang, Charalampos Tsourakakis, Ana Paula Appel, Christos Faloutsos and Jure Leskovec, 
HADI: Fast Diameter Estimation and Mining in Massive Graphs with Hadoop (2008).

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.

To compute an approximate diameter of a graph, the minimal set of options
required are:
\verbatim
> ./approximate_diameter --graph=[graph prefix] --format=[format]
\endverbatim
Output looks like:
\verbatim
Approximate graph diameter
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
1-th hop: 12895307 vertex pairs are reached
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
2-th hop: 319726269 vertex pairs are reached
INFO:     synchronous_engine.hpp(start:1263): 0: Starting iteration: 0
INFO:     synchronous_engine.hpp(start:1312):   Active vertices: 1271950
INFO:     synchronous_engine.hpp(start:1361):    Running Aggregators
3-th hop: 319769151 vertex pairs are reached
converge
graph calculation time is 40 sec
approximate diameter is 2
\endverbatim

This program can also run distributed by using
\verbatim
> mpiexec -n [N machines] --hostfile [host file] ./approximate_diameter ....
\endverbatim
See your MPI documentation for details on how to launch this job.
All machines must have access to the input graph location and the output graph 
location. Graphs may be on HDFS. 
If you have problems loading HDFS files, see the \ref FAQ.

\subsection Options
Relevant options are: 
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph 
\li \b --tol (Optional. Default=1E-4). Changes the convergence tolerance for 
the number of reached vertex pairs at each hop.
\li \b --use-sketch (Optional. Default=1). If true, will use Flajolet & Martin 
bitmask to approximately count numbers of reached vertex pairs, and will require a 
smaller memory. If false, will count exact numbers of reached vertex pairs. But 
this will need a huge memory and be slow.
\li \b --ncpus (Optional. Default 2). The number of processors that will be used
for computation.  
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.


\section graph_analytics_partitioning Graph Partitioning 

This program can partition a graph by using normalized cut.

The input to the system is a graph in any of the Portable Graph formats
described in \ref graph_formats.
You can also give weights to edges with the <tt>weight</tt> format.
For instance in this <tt>weight</tt> format file, there are 5 edges:

\verbatim
1 2 4.0
2 3 1.0
3 4 5.0
4 5 2.0
5 3 3.0
\endverbatim

To partition a graph, the minimal set of options required are:

\verbatim
> ./partitioning --graph=[graph prefix] --format=[format]
\endverbatim

This program uses svd in Graphlab Collaborative Filtering Toolkit and 
kmeans in Graphlab Clustering Toolkit. The paths to the directories are 
specified by <tt>--svd-dir</tt> and <tt>--kmeans-dir</tt>, respectively.

The program will create some intermediate files. The final partitioning
result is written in files named <tt>[graph prefix].result</tt> with suffix,
for example <tt>[graph prefix].result_1_of_4</tt>. The partitioning result 
data consists of two columns: one for the ids and the other for the 
assigned partitions. For instance:

\verbatim
1 0
2 0
3 1
4 1
5 1
\endverbatim

<b>NOTE:</b> To run this program in a distributed setting, you must use the 
"mpi-args" option, not like other graphlab toolkits. 
The graph partitioning calls other graphlab programs.
When "--mpi-args" is set, these graphlab programs are called with "mpiexec" and the 
string written after the "mpi-args" option.
For example, if you set --mpi-args="-n 4 --hostfile host", the program calls the 
other graphlab programs with "mpiexec -n 4 --hostfile host".

\subsection Options
Relevant options are:
\li \b --graph (Required). The prefix from which to load the graph data
\li \b --format (Required). The format of the input graph. If "weight" is 
set, the program will read the data file where each line holds [id1] [id2] 
[weight].
\li \b --partitions (Optional. Default 2). The number of partitions
\li \b --svd-dir (Optional. Default ../collaborative_filtering/).
Path to the directory where Graphlab svd is located
\li \b --kmeans-dir (Optional. Default ../clustering/). Path to the directory where 
Graphlab kmeans is located
\li \b --ncpus (Optional. Default 2). The number of processors that will be used 
for computation.
\li \b --graph_opts (Optional, Default empty). Any additional graph options. See
  graphlab::distributed_graph a list of options.
\li \b --mpi-args (Optional, Default empty). If set, will execute mipexec with the given string.
  
  
\section graph_analytics_total_subgraph_centrality "Total Subgraph Centrality"
Total subgraph centrality was implemented by Jacob Kesinger, see additional
details in his <a href="http://jacobkesinger.tumblr.com/post/64338572799/total-subgraph-centrality">blog post</a>.
Total Subgraph Communicability is a new centrality measure due to
Benzi&Klymco [1].   For a directed graph with adjacenty matrix A, 

\verbatim
TSC_i = sum_j exp(A)_{ij} = (exp(A)*1)_i.
\endverbatim

This code calculates the TSC using an Arnoldi iteration on the Krylov
subspace {b, Ab,A*Ab, A*A*Ab, ...}  due to Saad[1], and using the new
warp engine from Graphlab 2.2 (without which this would have been, at
best, very challenging).

Small components of large graphs will have bogus answers due to
floating point issues.  To find the exact TSC for a particular node i,
run with "--column i" to find exp(A)*e_i; you will have to sum the
resulting output yourself, however.


SAMPLE INPUT:
\verbatim
0	1
1	2
1	3
2	4
3	4
1	0
2	1
3	1
4	2
4	3
\endverbatim

OUTPUT:
\verbatim
0 5.17784
1 10.3319
2 8.49789
3 8.49789
4 7.96807
\endverbatim

You can verify this in python as:
\verbatim
import scipy
import scipy.linalg
A = scipy.array([[0,1,0,0,0],[1,0,1,1,0],[0,1,0,0,1],[0,1,0,0,1],[0,0,1,1,0]])
scipy.linalg.expm2(A).sum(axis=1)
\endverbatim

[1]: Benzi, Michele, and Christine Klymko. Total Communicability as a Centrality Measure. ArXiv e-print, February 27, 2013. <a href="http://arxiv.org/abs/1302.6770">arxiv</a>

[2]: Saad, Yousef. “Analysis of Some Krylov Subspace Approximations to the Matrix Exponential Operator.” SIAM Journal on Numerical Analysis 29, no. 1 (1992): 209–228.
*/


================================================
FILE: toolkits/graph_analytics/graph_laplacian.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <map>
#include <time.h>

#include <graphlab.hpp>
#include <graphlab/graph/distributed_graph.hpp>

std::vector<float> vertex_degrees;

struct vdata{
  float degree;
  vdata() : degree(0.0){}
  void save(graphlab::oarchive& oarc) const {
    oarc << degree;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> degree;
  }
};

struct edata{
  float weight;
  edata() : weight(1.0){}
  explicit edata(const float in_w):
    weight(in_w){}
  void save(graphlab::oarchive& oarc) const {
    oarc << weight;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> weight;
  }
};

typedef graphlab::distributed_graph<vdata, edata> graph_type;


// [vertex_id1] [vertex_id2] [weight]
// NOTE: vertex id should start from 1.
bool line_parser(graph_type& graph, const std::string& filename,
    const std::string& textline) {
  std::stringstream strm(textline);
  size_t source = 0;
  size_t target = 0;
  float weight = 0.0;
  strm >> source;
  strm.ignore(1);
  strm >> target;
  strm.ignore(1);
  strm >> weight;
  if(source != target)
	  graph.add_edge(source, target, edata(weight));

  return true;
}

//calculate vertex degree
class add_rows: public graphlab::ivertex_program<graph_type, float>, public graphlab::IS_POD_TYPE {
public:
	add_rows() {
  }

  void init(icontext_type& context, const vertex_type& vertex,
      const message_type& msg) {
  }

  //gather on all the edges
  edge_dir_type gather_edges(icontext_type& context,
      const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }

  //for each edge gather the weighted of the edge
  float gather(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
    return edge.data().weight;
  }

  //take inverse square root
  void apply(icontext_type& context, vertex_type& vertex,
      const gather_type& total) {
    vertex.data().degree = total;
  }

  edge_dir_type scatter_edges(icontext_type& context,
      const vertex_type& vertex) const {
      return graphlab::NO_EDGES;
  }
  void scatter(icontext_type& context, const vertex_type& vertex,
      edge_type& edge) const {
  }
};

//take inverse square root
void inverse_square_root(graph_type::vertex_type& v) {
  v.data().degree = 1.0 / sqrt(v.data().degree);
}

//multiply D^-1/2
void multiply_D(graph_type::edge_type& e) {
  const float& d1 = e.source().data().degree;
  const float& d2 = e.target().data().degree;
  e.data().weight = e.data().weight * d1 * d2;
}

//needed for normalization for ratio cut
struct max_degree{
  float degree;
  max_degree(): degree(0.0){}
  explicit max_degree(float in_degree): degree(in_degree){}

  max_degree& operator+=(const max_degree& other){
    degree = std::max(degree, other.degree);
    return *this;
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << degree;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> degree;
  }
};
max_degree create_max_degree(const graph_type::vertex_type& vertex) {
  return max_degree(vertex.data().degree);
}

//for normalization for ratio cut
float normalize_factor = 1.0;
void normalize_weight(graph_type::edge_type& e) {
  e.data().weight /= normalize_factor;
}
void normalize_degree(graph_type::vertex_type& v) {
  v.data().degree /= normalize_factor;
}

struct max_vid{
  size_t vid;
  max_vid(): vid(0){}
  explicit max_vid(size_t in_vid): vid(in_vid){}

  max_vid& operator+=(const max_vid& other){
    vid = std::max(vid, other.vid);
    return *this;
  }
  void save(graphlab::oarchive& oarc) const {
    oarc << vid;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> vid;
  }
};
max_vid absolute_vertex_data(const graph_type::vertex_type& vertex) {
  return max_vid(vertex.id());
}

class graph_writer_normalized_cut {
public:
	graph_writer_normalized_cut(){}
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    size_t vid = v.id();
    if(vid == 0)
    	return "";
    strm << vid << " " << vid << " 2.0\n";
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) {
    std::stringstream strm;
    size_t source = e.source().id();
    size_t target = e.target().id();
    float weight = e.data().weight;
    strm << source << " " << target << " " << weight << "\n";
    strm << target << " " << source << " " << weight << "\n";
    return strm.str();
  }
};

class graph_writer_ratio_cut {
public:
	graph_writer_ratio_cut(){}
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    size_t vid = v.id();
    if(vid == 0)
    	return "";
    strm << vid << " " << vid << " " << 5.0 - v.data().degree << "\n";
    return strm.str();
  }

  std::string save_edge(graph_type::edge_type e) {
    std::stringstream strm;
    size_t source = e.source().id();
    size_t target = e.target().id();
    float weight = e.data().weight;
    strm << source << " " << target << " " << weight << "\n";
    strm << target << " " << source << " " << weight << "\n";
    return strm.str();
  }
};


int main(int argc, char** argv) {
  std::cout << "Construct graph Laplacian for graph partitioning.\n\n";

  //parse command line
  std::string graph_dir;
  std::string format = "adj";
  bool normalized_cut = true;
  bool ratio_cut = false;
  graphlab::command_line_options clopts
	("Constructing graph Laplacian for graph partitioning");
  clopts.attach_option("graph", graph_dir,
                       "The graph file. This is not optional. Vertex ids must start from 1 "
                       "and must not skip any numbers.");
  clopts.attach_option("format", format,
                       "The graph file format. If \"weight\" is set, the program will read "
                       "the data file where each line holds [id1] [id2] [weight].");
//  clopts.attach_option("normalized-cut", normalized_cut,
//                       "construct graph laplacian for normalized cut");
//  clopts.attach_option("ratio-cut", ratio_cut,
//                       "construct graph laplacian for ratio cut");
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (graph_dir == "") {
	std::cout << "--graph is not optional\n";
	return EXIT_FAILURE;
  }
//  if(normalized_cut == true && ratio_cut == true){
//    std::cout << "Both normalized-cut and ratio-cut are true. Ratio cut is selected.\n";
//    normalized_cut = false;
//  }else if(normalized_cut == false && ratio_cut == false){
//    std::cout << "Both normalized-cut and ratio-cut are false. Ratio cut is selected.\n";
//    ratio_cut = true;
//  }

  //load graph
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  graph_type graph(dc);
  dc.cout() << "Loading graph in format: "<< format << std::endl;
  if(format == "weight")
    graph.load(graph_dir, line_parser);
  else
    graph.load_format(graph_dir, format);
  graph.finalize();

  time_t start, end;
  graphlab::omni_engine<add_rows> engine(dc, graph, "sync", clopts);
  engine.signal_all();
  time(&start);
  engine.start();
  if(normalized_cut == true){
	  graph.transform_vertices(inverse_square_root);
	  graph.transform_edges(multiply_D);
  }else if(ratio_cut == true){//normalize weight for ratio cut
//    normalize_factor = graph.map_reduce_vertices<max_degree>(create_max_degree).degree;
//    graph.transform_edges(normalize_weight);
//    graph.transform_vertices(normalize_degree);
  }
  time(&end);

  dc.cout() << "graph calculation time is " << (end - start) << " sec\n";
  dc.cout() << "writing...\n";

  const std::string outputname = graph_dir + ".glap";
  if(normalized_cut == true)
    graph.save(
	  outputname,
	  graph_writer_normalized_cut(), false, //set to true if each output file is to be gzipped
	  true, //whether vertices are saved
	  true);//whether edges are saved
  else if(ratio_cut == true)
    graph.save(
	  outputname,
	  graph_writer_ratio_cut(), false, //set to true if each output file is to be gzipped
	  true, //whether vertices are saved
	  true);//whether edges are saved

  size_t data_num = graph.map_reduce_vertices<max_vid>(absolute_vertex_data).vid;
  //#graphlab::mpi_tools::finalize();

  //write the number of data
  if (graphlab::mpi_tools::rank()==0) {
  const std::string datanum_filename = graph_dir + ".datanum";
  std::ofstream ofs(datanum_filename.c_str());
  if(!ofs) {
    std::cout << "can't create file for number of data" << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  ofs << data_num;
  }
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/http/index.html
================================================
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
<meta name="author" content="GraphLab Dev Team" />
<meta name="publisher" content="Carnegie Mellon University" />
<meta name="copyright" content="(c) 2012. GraphLab.org" />
<meta name="distribution" content="global" />
<meta name="keywords" content="GraphLabUI" />
<meta name="description" content="Twitter Triangle Counter Results" />
<meta name="robots" content="all" />
<title>Twitter Triangle Counter Results</title>
<link type="text/css" rel="stylesheet" href="style.css"/> 


<!-- Load external APIs -->
<script 
   type="text/javascript" 
   src="https://www.google.com/jsapi">
</script>


<!-- The Main visualization script -->
<script 
   type="text/javascript" 
   src="twitter_triangles.js">
</script>
</head>


<body>

<div id="main_page">

<h1> Twitter Triangle Count Results </h1>
<INPUT TYPE="button" NAME="button" Value="Refresh" onClick="refresh()">


<div id="info">
  <div class="setting">
   <div class="label">Vertices: </div>
   <div id="nvertices">42 Million</div>
  </div>

  <div class="setting">
   <div class="label">Edges: </div>
   <div id="nedges">1.5 Billion</div>
  </div>

  <div class="setting">
   <div class="label">Triangles:</div>
   <div id="ntriangles">188 Billion</div>
  </div>
</div>

<div id="results"></div>

<div>
</body>


</html>


================================================
FILE: toolkits/graph_analytics/http/make_jsons.m
================================================
clear;
raw = importdata('directed_triangles');
%%
col.vid       = 1;
col.in        = 3; % Triangles among people following you
col.out       = 2; % Triangles among peopel you follow
col.through   = 4;
col.cycle     = 5;
col.followers = 6;
col.following = 7;


%% compute top by cycle

degree = raw(:, col.followers) + raw(:, col.following);
[~,ind.degree] = sort(degree, 'descend');

[~,ind.followers] = sort(raw(:,col.followers), 'descend');

[~,ind.cycle] = sort(raw(:,col.cycle), 'descend');
[~,ind.in] = sort(raw(:,col.in), 'descend');
[~,ind.out] = sort(raw(:,col.out), 'descend');
[~,ind.through] = sort(raw(:,col.through), 'descend');

%%
cluster_coeff = sum(raw(:, [2,3,4,5]),2) ./ (degree + 1);
[~,ind.cluster_coeff] = sort(cluster_coeff, 'descend');

%%

cluster_coeff2 = sum(raw(:, [2,3,4,5]),2) ./ (raw(:, col.following) +1);
[~,ind.cluster_coeff2] = sort(cluster_coeff2, 'descend');

%% 

cluster_coeff3 = raw(:, col.through) ./ (degree + 1);
[~,ind.cluster_coeff3] = sort(cluster_coeff3, 'descend');


%% 
cluster_coeff4 = raw(:, col.in) ./ (raw(:, col.followers) + 1);
[~,ind.cluster_coeff4] = sort(cluster_coeff4, 'descend');


%% Render json
nusers = 10;


fid = fopen('top_users.json', 'w');
fprintf(fid, '[\n');

fprintf(fid, '\t { "name": "degree", "label": "Degree", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        [raw(ind.degree(i), col.vid), degree(ind.degree(i))]); 
end
fprintf(fid, '\t]},\n');

fprintf(fid, '\t { "name": "followers", "label": "Followers", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        raw(ind.followers(i), [col.vid, col.followers])); 
end
fprintf(fid, '\t]},\n');


fprintf(fid, '\t { "name": "cycle", "label": "Cycle Triangles", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        raw(ind.cycle(i), [col.vid, col.cycle])); 
end
fprintf(fid, '\t]},\n');

% fprintf(fid, '\t { "name": "in", "label": "In Triangles", "values": [\n');
% for i = 1:nusers
%    sep = ',\n';
%    if(i == nusers)
%         sep = '\n';
%    end
%    fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
%         raw(ind.in(i), [col.vid, col.in])); 
% end
% fprintf(fid, '\t]},\n');
% 
% 
% fprintf(fid, '\t { "name": "out", "label": "Out Triangles", "values": [\n');
% for i = 1:nusers
%    sep = ',\n';
%    if(i == nusers)
%         sep = '\n';
%    end
%    fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
%         raw(ind.out(i), [col.vid, col.out])); 
% end
% fprintf(fid, '\t]},\n');
% 
% 
% fprintf(fid, '\t { "name": "through", "label": "Through Triangles", "values": [\n');
% for i = 1:nusers
%    sep = ',\n';
%    if(i == nusers)
%         sep = '\n';
%    end
%    fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
%         raw(ind.through(i), [col.vid, col.through])); 
% end
% fprintf(fid, '\t]},\n');
% 

fprintf(fid, '\t { "name": "cluster", "label": "Triangles / Degree", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        [raw(ind.cluster_coeff(i), col.vid), cluster_coeff(ind.cluster_coeff(i))]); 
end
fprintf(fid, '\t]},\n');


fprintf(fid, '\t { "name": "cluster2", "label": "Triangles / Following", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        [raw(ind.cluster_coeff2(i), col.vid), cluster_coeff2(ind.cluster_coeff2(i))]); 
end
fprintf(fid, '\t]},\n');

fprintf(fid, '\t { "name": "cluster3", "label": "Through Triangles / Degree", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        [raw(ind.cluster_coeff3(i), col.vid), cluster_coeff3(ind.cluster_coeff3(i))]); 
end
fprintf(fid, '\t]},\n');


fprintf(fid, '\t { "name": "cluster4", "label": "In Triangles / Followers", "values": [\n');
for i = 1:nusers
   sep = ',\n';
   if(i == nusers)
        sep = '\n';
   end
   fprintf(fid, ['\t\t ["%d", "%d"]', sep], ...
        [raw(ind.cluster_coeff4(i), col.vid), cluster_coeff4(ind.cluster_coeff4(i))]); 
end
fprintf(fid, '\t]}\n');


fprintf(fid, ']\n');
fclose(fid);


================================================
FILE: toolkits/graph_analytics/http/style.css
================================================
body {
    text-align: center;
}

#main_page {
    width: 900px;
    margin-left: auto;
    margin-right: auto;

}

#info {
   width: auto;
 
}

.setting {
    width: auto;
    display: inline-block;
}


#results {
 
}


.user_list {
    display: inline-block;
    border-style: solid;
    height: 600px;
    width: 250px;
    vertical-align: top;
    text-align: left;
    margin: 5px;
}


.title {
    padding: 10px;
    background: black;
    color: white;
    text-align: center;
}


.contents {
   padding: 10px;
}
.user_image {
    width: 50px;
    height: 50px;
}

.user {

}

.user_info {
  padding-left: 10px;
    display: inline-block;
  vertical-align: top;
}

.name {

}

.value {


}


================================================
FILE: toolkits/graph_analytics/http/top_users.json
================================================
[
	 { "name": "degree", "label": "Degree", "values": [
		 ["16409683", "3081108"],
		 ["19058681", "2997653"],
		 ["15846407", "2679666"],
		 ["813286", "2653045"],
		 ["428333", "2450768"],
		 ["19397785", "1994945"],
		 ["783214", "1959765"],
		 ["16190898", "1885917"],
		 ["19757371", "1844564"],
		 ["17461978", "1844123"]
	]},
	 { "name": "followers", "label": "Followers", "values": [
		 ["19058681", "2997470"],
		 ["15846407", "2679640"],
		 ["16409683", "2674870"],
		 ["428333", "2450750"],
		 ["19397785", "1994930"],
		 ["783214", "1959710"],
		 ["16190898", "1885780"],
		 ["813286", "1882890"],
		 ["19757371", "1844500"],
		 ["17461978", "1843560"]
	]},
	 { "name": "cycle", "label": "Cycle Triangles", "values": [
		 ["11915432", "104231123"],
		 ["14389132", "102710493"],
		 ["14669398", "97731164"],
		 ["21836409", "95138730"],
		 ["5210841", "94708939"],
		 ["15117375", "91658795"],
		 ["804455", "90986939"],
		 ["17850012", "90488711"],
		 ["15991049", "89806725"],
		 ["11622712", "88501346"]
	]},
	 { "name": "cluster", "label": "Triangles / Degree", "values": [
		 ["19176053", "4.484443e+03"],
		 ["21804494", "4.474835e+03"],
		 ["22903720", "4.459627e+03"],
		 ["15489652", "4.455816e+03"],
		 ["21773417", "4.455429e+03"],
		 ["22924805", "4.454160e+03"],
		 ["22707565", "4.445663e+03"],
		 ["22447669", "4.442444e+03"],
		 ["21772831", "4.441429e+03"],
		 ["23062614", "4.428422e+03"]
	]},
	 { "name": "cluster2", "label": "Triangles / Following", "values": [
		 ["19784831", "6449985"],
		 ["18498684", "5.962666e+06"],
		 ["16303106", "5607368"],
		 ["24285686", "5272692"],
		 ["23832022", "4483823"],
		 ["780457", "3564740"],
		 ["21499292", "3207562"],
		 ["19397785", "2.936561e+06"],
		 ["17220934", "2.488950e+06"],
		 ["428333", "2.474015e+06"]
	]},
	 { "name": "cluster3", "label": "Through Triangles / Degree", "values": [
		 ["21804494", "1.183061e+03"],
		 ["15489652", "1.177176e+03"],
		 ["22903720", "1.175795e+03"],
		 ["21773417", "1.175569e+03"],
		 ["21772831", "1.173847e+03"],
		 ["22447669", "1.173059e+03"],
		 ["17232810", "1.165794e+03"],
		 ["23062614", "1.165705e+03"],
		 ["14995603", "1.164518e+03"],
		 ["7967282", "1.162090e+03"]
	]},
	 { "name": "cluster4", "label": "In Triangles / Followers", "values": [
		 ["15143707", "2.230469e+03"],
		 ["22903720", "2.202895e+03"],
		 ["31083199", "2.201453e+03"],
		 ["19955829", "2.198339e+03"],
		 ["22708071", "2.195993e+03"],
		 ["15135065", "2.177517e+03"],
		 ["21772965", "2.176703e+03"],
		 ["24953387", "2.171151e+03"],
		 ["22447669", "2.164331e+03"],
		 ["22343399", "2.161766e+03"]
	]}
]


================================================
FILE: toolkits/graph_analytics/http/twitter_triangles.js
================================================
google.load("jquery", "1.5");


var domain_str = "http://localhost:8090";
var domain_str = "";
var page_str =  "top_users.json";

// jsonp callback required
var twitter_addr = "http://api.twitter.com/1/users/lookup.json"

var current_results = [];
var user_profiles = {};

function update_domain(form) {
    domain_str = form.inputbox.value;
    get_top_users();
}


function refresh() {
    get_top_users();
    reloadStylesheets();
    render_page();
}

// Start the rendering of the UI
google.setOnLoadCallback(function() { 
    get_top_users();
});


function get_top_users() {
    jQuery.getJSON(domain_str + page_str, get_user_profiles).error(function() { 
        console.log("Unable to access " + domain_str + " will try again.");
    });
    // .complete(function() {
    //             setTimeout(get_top_users, update_interval);
    //         });
}

 
function get_user_profiles(data) {
    // save the original results
    current_results = data;
    // compute the union of all the _missing_ profiles
    jQuery.each(current_results, function(i, list) {
        console.log(list.name);
        jQuery.each(list.values, function(i, pair) {
            var id = pair[0];
            if(user_profiles[id] == undefined) { 
                user_profiles[id] = { queried: false, is_set: false, profile: {} }; 
            }
        });
    });

    var id_list = "";
    var id_list_len = 0;
    // Grab all _missing_ profiles
    jQuery.each(user_profiles, function(id, obj) {
        console.log(id);
        if(!user_profiles[id].queried) {
            console.log("Requesting: " + id);
            user_profiles[id].queried = true;
            id_list += id;
            id_list_len++;
            if(id_list_len >= 99) {
                jQuery.getJSON(twitter_addr + "?callback=?", {user_id: id_list}, process_ids);
                id = "";
                id_list_len = 0;
            } else { id_list += ","; }
        } 
    });
    if(id_list_len > 0) {
        jQuery.getJSON(twitter_addr + "?callback=?", {user_id: id_list}, process_ids);
        id = "";
        id_list_len = 0;
    }
} // end of get user profiles


function process_ids(data) {
    jQuery.each(data, function(i, profile) {
        var id = profile.id;
        user_profiles[id].is_set = true;
        user_profiles[id].profile = profile;
    });;
    render_page();
}

function render_page() {
    var container = $("#results");
    container.empty();

    // compute the union of all the profiles
    jQuery.each(current_results, function(i, list) {
        console.log("Creating div for: " + list.name);
        var div_str = 
            "<div class=\"user_list\" id=\"" + list.name + "\">" +
            "<div class=\"title\">" + list.label + "</div>" +
            "<div class=\"contents\">"
        jQuery.each(list.values, function(i, pair) {
            var id = pair[0];
            var count = pair[1];
            if(user_profiles[id].is_set) {
                var profile = user_profiles[id].profile;
                div_str += "<div class=\"user\" id=\"" + profile.id_str + "\">" +
                    "<img class=\"user_image\" src=\"" + profile.profile_image_url + "\" / >" +
                    "<div class=\"user_info\">" +
                    "<div class=\"name\">" +
                    "<a href=\"http://twitter.com/#!/" + profile.screen_name + "\">" + 
                    profile.name + 
                    "</a>" +
                    "</div>" +
                    "<div class=\"value\">" + count + "</div>" +
                    "</div>" + "</div>";
            }
        });
        div_str += "</div></div>";
        container.append(div_str);
    });

}

function reloadStylesheets() {
    var queryString = '?reload=' + new Date().getTime();
    $('link[rel="stylesheet"]').each(function () {
        this.href = this.href.replace(/\?.*|$/, queryString);
    });
}


================================================
FILE: toolkits/graph_analytics/kcore.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>
/**
 *
 * In this program we implement the "k-core" decomposition algorithm.
 * We use a parallel variant of
 * 
 * V. Batagelj and M. Zaversnik, An O(m) algorithm for cores
 * decomposition of networks,
 *
 *  - Essentially, recursively remove everything with degree 1
 *  - Then recursively remove everything with degree 2
 *  - etc.
 */

/*
 * Each vertex maintains a "degree" count. If this value
 * is 0, the vertex is "deleted"
 */
typedef int vertex_data_type;

/*
 * Don't need any edges
 */
typedef graphlab::empty edge_data_type;

/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;

// The current K to compute
size_t CURRENT_K;

/*
 * The core K-core implementation.
 * The basic concept is simple.
 * Each vertex maintains a count of the number of adjacent edges.
 * If a vertex receives a message, the message contains the number of
 * adjacent edges deleted. The vertex then updates its counter.
 * If the counter falls below K, it deletes itself
 * (set the adjacent count to 0) and signals each of its neighbors
 * with a message of 1.
 */
class k_core :
  public graphlab::ivertex_program<graph_type,
                                   graphlab::empty, // gathers are integral
                                   int>,   // messages are integral
  public graphlab::IS_POD_TYPE  {
public:
  // the last received message
  int msg;
  
  /* Each vertex can only signal once. I set this flag
   * if it is the first time this vertex falls below K, so I can
   * initiate scattering
   */
  bool just_deleted;
  
  k_core():msg(0),just_deleted(false) { }

  /* The message contains the number of adjacent edges deleted.
   * Store the message in the program, and reset the just_deleted flag
   */
  void init(icontext_type& context, const vertex_type& vertex,
            const message_type& message) {
    msg = message;
    just_deleted = false;
  }

  // gather is never invoked
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }

  /* On apply, if the vertex has not yet been deleted,
   * decrement the counter on the vertex.
   * If the adjacency count of the vertex falls below K,
   * the vertex shall be deleted.
   * We set the vertex data to 0 to designate that it is deleted
   * and Set the just_deleted flag to signal the neighbors in scatter
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& unused) {
    if (vertex.data() > 0) {
      vertex.data() -= msg;
      if (vertex.data() < CURRENT_K) {
        just_deleted = true;
        vertex.data() = 0;
      }
    }
  } 

  /*
   * If the vertex is deleted, we signal all neighbors on the scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return just_deleted ?
      graphlab::ALL_EDGES : graphlab::NO_EDGES;
  }

  /*
   * For each neighboring vertex, if it is not yet deleted,
   * signal it.
   */
  void scatter(icontext_type& context,
               const vertex_type& vertex,
               edge_type& edge) const {
    vertex_type other = edge.source().id() == vertex.id() ?
      edge.target() : edge.source();
    if (other.data() > 0) {
      context.signal(other, 1);
    }
  }
  
};

// type of the synchronous_engine
typedef graphlab::synchronous_engine<k_core> engine_type;

/*
 * Called before any graph operation is performed.
 * Initializes all vertex data to the number of adjacent edges.
 * Can be called from a graph.transform_vertices()
 */
void initialize_vertex_values(graph_type::vertex_type& v) {
  v.data() = v.num_in_edges() + v.num_out_edges();
}

/*
 * Signals all non-deleted vertices with degree less than K.
 * Can be called from an engine.map_reduce_vertices()
 * We return empty since no reduction is performed. Only the map.
 */
graphlab::empty signal_vertices_at_k(engine_type::icontext_type& ctx,
                                     const graph_type::vertex_type& vertex) {
  if (vertex.data() > 0 && vertex.data() < CURRENT_K) {
    ctx.signal(vertex, 0);
  }
  return graphlab::empty();
}

/*
 * Counts the number of un-deleted vertices.
 */
size_t count_active_vertices(const graph_type::vertex_type& vertex) {
  return vertex.data() > 0;
}

/*
 * Counts the degree of each un-deleted vertex. Half of this
 * will be the size of the K-core graph.
 */
size_t double_count_active_edges(const graph_type::vertex_type& vertex) {
  return (size_t) vertex.data();
}


/*
 * Saves the graph in a tsv format with the condition that
 * the adjacent vertices have not yet been deleted.
 * This allows saving of the k-core graph.
 */
struct save_core_at_k {
  std::string save_vertex(graph_type::vertex_type) { return ""; }
  std::string save_edge(graph_type::edge_type e) {
    if (e.source().data() > 0 && e.target().data() > 0) {
      return graphlab::tostr(e.source().id()) + "\t" +
        graphlab::tostr(e.target().id()) + "\n";
    }
    else return "";
  }
};
    
int main(int argc, char** argv) {
  std::cout << "Computes a k-core decomposition of a graph.\n\n";

  graphlab::command_line_options clopts
    ("K-Core decomposition. This program "
     "computes the K-Core decomposition of a graph, for K ranging from [kmin] "
     "to [kmax]. The size of the remaining K-core graph at each K is printed. "
     "The [savecores] allow the saving of each K-Core graph in a TSV format"
     );
  std::string prefix, format;
  size_t kmin = 0;
  size_t kmax = (size_t)(-1);
  std::string savecores;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
  clopts.attach_option("kmin", kmin,
                       "Compute the k-Core for k the range [kmin,kmax]");
  clopts.attach_option("kmax", kmax,
                       "Compute the k-Core for k the range [kmin,kmax]");
  clopts.attach_option("savecores", savecores,
                       "If non-empty, will save tsv of each core with prefix [savecores].K.");

  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix == "") {
    std::cout << "--graph is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  else if (format == "") {
    std::cout << "--format is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  else if (kmax < kmin) {
    std::cout << "kmax must be at least as large as kmin\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // load graph
  graph_type graph(dc, clopts);
  graph.load_format(prefix, format);
  graph.finalize();
  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
            << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;

  graphlab::synchronous_engine<k_core> engine(dc, graph, clopts);

  // initialize the vertex data with the degree
  graph.transform_vertices(initialize_vertex_values);

  // for each K value
  for (CURRENT_K = kmin; CURRENT_K <= kmax; CURRENT_K++) {
    // signal all vertices with degree less than K
    engine.map_reduce_vertices<graphlab::empty>(signal_vertices_at_k);
    // recursively delete all vertices with degree less than K
    engine.start();
    // count the number of vertices and edges remaining
    size_t numv = graph.map_reduce_vertices<size_t>(count_active_vertices);
    size_t nume = graph.map_reduce_vertices<size_t>(double_count_active_edges) / 2;
    if (numv == 0) break;
    // Output the size of the graph
    dc.cout() << "K=" << CURRENT_K << ":  #V = "
              << numv << "   #E = " << nume << std::endl;

    // Saves the result if requested
    if (savecores != "") {
      graph.save(savecores + "." + graphlab::tostr(CURRENT_K) + ".",
                 save_core_at_k(),
                 false, /* no compression */ 
                 false, /* do not save vertex */
                 true, /* save edge */ 
                 clopts.get_ncpus()); /* one file per machine */
    }
  }
  
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/pagerank.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
// #include <graphlab/macros_def.hpp>

// Global random reset probability
double RESET_PROB = 0.15;

double TOLERANCE = 1.0E-2;

size_t ITERATIONS = 0;

bool USE_DELTA_CACHE = false;

// The vertex data is just the pagerank value (a double)
typedef double vertex_data_type;

// There is no edge data in the pagerank application
typedef graphlab::empty edge_data_type;

// The graph type is determined by the vertex and edge data types
typedef graphlab::distributed_graph<vertex_data_type, edge_data_type> graph_type;

/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


/*
 * The factorized page rank update function extends ivertex_program
 * specifying the:
 *
 *   1) graph_type
 *   2) gather_type: double (returned by the gather function). Note
 *      that the gather type is not strictly needed here since it is
 *      assumed to be the same as the vertex_data_type unless
 *      otherwise specified
 *
 * In addition ivertex program also takes a message type which is
 * assumed to be empty. Since we do not need messages no message type
 * is provided.
 *
 * pagerank also extends graphlab::IS_POD_TYPE (is plain old data type)
 * which tells graphlab that the pagerank program can be serialized
 * (converted to a byte stream) by directly reading its in memory
 * representation.  If a vertex program does not exted
 * graphlab::IS_POD_TYPE it must implement load and save functions.
 */
class pagerank :
  public graphlab::ivertex_program<graph_type, double> {

  double last_change;
public:

  /**
   * Gather only in edges.
   */
  edge_dir_type gather_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return graphlab::IN_EDGES;
  } // end of Gather edges


  /* Gather the weighted rank of the adjacent page   */
  double gather(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    return (edge.source().data() / edge.source().num_out_edges());
  }

  /* Use the total rank of adjacent pages to update this page */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& total) {

    const double newval = (1.0 - RESET_PROB) * total + RESET_PROB;
    last_change = (newval - vertex.data());
    vertex.data() = newval;
    if (ITERATIONS) context.signal(vertex);
  }

  /* The scatter edges depend on whether the pagerank has converged */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    // If an iteration counter is set then
    if (ITERATIONS) return graphlab::NO_EDGES;
    // In the dynamic case we run scatter on out edges if the we need
    // to maintain the delta cache or the tolerance is above bound.
    if(USE_DELTA_CACHE || std::fabs(last_change) > TOLERANCE ) {
      return graphlab::OUT_EDGES;
    } else {
      return graphlab::NO_EDGES;
    }
  }

  /* The scatter function just signal adjacent pages */
  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    if(USE_DELTA_CACHE) {
      context.post_delta(edge.target(), last_change);
    }

    if(last_change > TOLERANCE || last_change < -TOLERANCE) {
        context.signal(edge.target());
    } else {
      context.signal(edge.target()); //, std::fabs(last_change));
    }
  }

  void save(graphlab::oarchive& oarc) const {
    // If we are using iterations as a counter then we do not need to
    // move the last change in the vertex program along with the
    // vertex data.
    if (ITERATIONS == 0) oarc << last_change;
  }

  void load(graphlab::iarchive& iarc) {
    if (ITERATIONS == 0) iarc >> last_change;
  }

}; // end of factorized_pagerank update functor


/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


double map_rank(const graph_type::vertex_type& v) { return v.data(); }


double pagerank_sum(graph_type::vertex_type v) {
  return v.data();
}

int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("engine", exec_type,
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  clopts.attach_option("format", format,
                       "The graph file format");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  clopts.attach_option("iterations", ITERATIONS,
                       "If set, will force the use of the synchronous engine"
                       "overriding any engine option set by the --engine parameter. "
                       "Runs complete (non-dynamic) PageRank for a fixed "
                       "number of iterations. Also overrides the iterations "
                       "option in the engine");
  clopts.attach_option("use_delta", USE_DELTA_CACHE,
                       "Use the delta cache to reduce time in gather.");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }


  // Enable gather caching in the engine
  clopts.get_engine_args().set_option("use_cache", USE_DELTA_CACHE);

  if (ITERATIONS) {
    // make sure this is the synchronous engine
    dc.cout() << "--iterations set. Forcing Synchronous engine, and running "
              << "for " << ITERATIONS << " iterations." << std::endl;
    clopts.get_engine_args().set_option("type", "synchronous");
    clopts.get_engine_args().set_option("max_iterations", ITERATIONS);
    clopts.get_engine_args().set_option("sched_allv", true);
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<pagerank> engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  engine.start();
  const double runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;


  const double total_rank = graph.map_reduce_vertices<double>(map_rank);
  std::cout << "Total rank: " << total_rank << std::endl;

  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  double totalpr = graph.map_reduce_vertices<double>(pagerank_sum);
  std::cout << "Totalpr = " << totalpr << "\n";

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation


================================================
FILE: toolkits/graph_analytics/partitioning.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <graphlab.hpp>

//remove assigned options from arguments
std::string get_arg_str_without(int argc, char** argv,
    std::vector<std::string> remove_opts) {
  std::stringstream strm;
  bool skip_next = false;
  for (int i = 1; i < argc; ++i) {
    bool skip = false;
    for (size_t j = 0; j < remove_opts.size(); ++j) {
      std::string with_equal = remove_opts[j] + "=";
      if (strncmp(with_equal.c_str(), argv[i], with_equal.size()) == 0) {
        skip = true;
      } else if (strncmp(remove_opts[j].c_str(), argv[i], remove_opts[j].size())
          == 0) {
        skip = true;
        skip_next = true;
      }
    }
    if (skip == false && skip_next == false) {
      strm << argv[i] << " ";
    } else if (skip == false && skip_next == true) {
      skip_next = false;
    }
  }
  return strm.str();
}

bool call_graph_laplacian(const std::string& mpi_args,
    const std::string& filename, const std::string& format,
    const bool normalized_cut, const bool ratio_cut, const std::string& args) {
  std::stringstream strm;
  if(mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << "./graph_laplacian ";
  strm << " --graph=" << filename;
  strm << " --format=" << format;
//  strm << " --normalized-cut=" << normalized_cut;
//  strm << " --ratio-cut=" << ratio_cut;
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

void make_initial_vector_file(const std::string& filename, const size_t num_data){
  std::ofstream ofs((filename + ".init").c_str());
  for(size_t i=0;i<num_data;++i){
    ofs << 0.1*((i+1)%10)/10.0 << "\n";
  }
  ofs.close();
}

bool call_svd(const std::string& mpi_args, const std::string& filename,
    const std::string& svd_dir, const size_t num_clusters, const size_t rank,
    const size_t num_data, const std::string& args) {
  make_initial_vector_file(filename, num_data+1);
  std::stringstream strm;
  if(mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << svd_dir << "svd " + filename + ".glap";
  strm << " --rows=" << num_data+1;
  strm << " --cols=" << num_data;
  strm << " --nsv=" << num_clusters;
  strm << " --nv=" << rank;
  strm << " --max_iter=4";
  strm << " --quiet=1";
  strm << " --save_vectors=1";
  strm << " --ortho_repeats=3";
  //strm << " --id=1";
  //strm << " --prediction=" << filename + ".";
  strm << " --prediction=" << filename;
  strm << " --initial_vector=" << filename + ".init";
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

bool call_eigen_vector_normalization(const std::string& mpi_args,
    const std::string& filename, const size_t num_clusters, const size_t rank,
    const size_t num_data, const std::string& args) {
  std::stringstream strm;
  if(mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << "./eigen_vector_normalization";
  strm << " --data=" << filename;
  strm << " --clusters=" << num_clusters;
  strm << " --rank=" << rank;
  strm << " --data-num=" << num_data;
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }

  return true;
}

bool call_kmeans(const std::string& mpi_args, const std::string& filename,
    const std::string& kmeans_dir, const size_t num_clusters,
    const std::string& args) {
  //call svd
  std::stringstream strm;
  if(mpi_args.length() > 0)
    strm << "mpiexec " << mpi_args << " ";
  strm << kmeans_dir << "kmeans ";
  strm << " --data " << filename << ".compressed";
  strm << " --clusters " << num_clusters;
  strm << " --output-data " << filename << ".result";
  strm << " --id=1";
  strm << " " << args;
  std::cout << "CALLING >" << strm.str() << std::endl;
  int sys_ret = system(strm.str().c_str());
  if (sys_ret != 0) {
    std::cout << "system call fails" << std::endl;
    return false;
  }
  return true;
}

//select good rank
int get_lanczos_rank(const size_t num_clusters, const size_t num_data) {
  size_t rank = 1;
  if (num_data < 1000) {
    if (num_clusters + 10 <= num_data)
      rank = num_clusters + 10;
    else
      rank = num_data;
  } else if (num_data < 10000) {
    rank = num_clusters + 25;
  } else if (num_data < 100000) {
    rank = num_clusters + 50;
  } else if (num_data < 1000000) {
    rank = num_clusters + 80;
  } else {
    rank = num_clusters + 100;
  }
  return rank;
//  return num_clusters + 1;
}

int main(int argc, char** argv) {
  std::cout << "Graph partitioning (normalized cut)\n\n";

  std::string graph_dir;
  std::string format = "adj";
  std::string svd_dir = "../collaborative_filtering/";
  std::string kmeans_dir = "../clustering/";
  std::string mpi_args;
  size_t num_partitions = 2;
  bool normalized_cut = true;
  bool ratio_cut = false;
  size_t sv = 0;
  //parse command line
  graphlab::command_line_options clopts(
          "Graph partitioning (normalized cut)");
  clopts.attach_option("graph", graph_dir,
                       "The graph file. This is not optional. Vertex ids must start from 1 "
                       "and must not skip any numbers.");
  clopts.attach_option("format", format,
                       "The graph file format. If \"weight\" is set, the program will read "
                       "the data file where each line holds [id1] [id2] [weight].");
  clopts.attach_option("partitions", num_partitions,
                       "The number of partitions to create");
  clopts.attach_option("svd-dir", svd_dir,
                       "Path to the directory of Graphlab svd");
  clopts.attach_option("kmeans-dir", kmeans_dir,
                       "Path to the directory of Graphlab kmeans");
  clopts.attach_option("mpi-args", mpi_args,
                       "If set, will execute mipexec with the given arguments. "
                       "For example, --mpi-args=\"-n [N machines] --hostfile [host file]\"");
  clopts.attach_option("sv", sv,
                       "Number of vectors in each iteration in the Lanczos svd.");
//  clopts.attach_option("normalized-cut", normalized_cut,
//                       "do normalized cut");
//  clopts.attach_option("ratio-cut", ratio_cut,
//                       "do ratio cut");
  if (!clopts.parse(argc, argv))
    return EXIT_FAILURE;
  if (graph_dir == "") {
    std::cout << "--graph is not optional\n";
    return EXIT_FAILURE;
  }
//  if(normalized_cut == true && ratio_cut == true){
//    std::cout << "Both normalized-cut and ratio-cut are true. Ratio cut is selected.\n";
//    normalized_cut = false;
//  }else if(normalized_cut == false && ratio_cut == false){
//    std::cout << "Both normalized-cut and ratio-cut are false. Ratio cut is selected.\n";
//    ratio_cut = true;
//  }
  std::vector<std::string> remove_opts;
  remove_opts.push_back("--graph");
  remove_opts.push_back("--format");
  remove_opts.push_back("--svd-dir");
  remove_opts.push_back("--kmeans-dir");
  remove_opts.push_back("--partitions");
  remove_opts.push_back("--mpi-args");
  remove_opts.push_back("--sv");
//  remove_opts.push_back("--normalized-cut");
//  remove_opts.push_back("--ratio-cut");
  std::string other_args = get_arg_str_without(argc, argv, remove_opts);

  //construct graph laplacian
  if (call_graph_laplacian(mpi_args, graph_dir, format, normalized_cut,
      ratio_cut, other_args) == false) {
    return EXIT_FAILURE;
  }

  //eigen value decomposition
  //read number of data
  size_t num_data = 0;
  const std::string datanum_filename = graph_dir + ".datanum";
  std::ifstream ifs(datanum_filename.c_str());
  if (!ifs) {
    std::cout << "can't read number of data." << std::endl;
    return false;
  }
  ifs >> num_data;
  //determine the rank of Lanczos method
  if(sv == 0){
    sv = get_lanczos_rank(num_partitions, num_data);
  }else{
    if(sv < num_partitions)
      sv = num_partitions;
  }
  if (call_svd(mpi_args, graph_dir, svd_dir, num_partitions, sv, num_data,
      other_args) == false) {
    return EXIT_FAILURE;
  }
  if (call_eigen_vector_normalization(mpi_args, graph_dir, num_partitions, sv,
      num_data, other_args) == false) {
    return EXIT_FAILURE;
  }

  //kmeans
  if (call_kmeans(mpi_args, graph_dir, kmeans_dir, num_partitions, other_args)
      == false) {
    return EXIT_FAILURE;
  }

  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/graph_analytics/saturation_ordered_coloring.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/*
 * Graph coloring algorithm, such that vertex programs are scheduled in 
 * order of saturation (number of differently colored adjacent nodes),
 * in "waves" of execution
 */

#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/macros_def.hpp>


typedef graphlab::vertex_id_type color_type;

/*
 * Vertex data: color and degree of node
 */
typedef struct {
  int color;
  int saturation;
  std::set<int> adj_colors;
   // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << color << saturation << adj_colors;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> color >> saturation >> adj_colors;
  }

} vertex_data_type;


/*
 * no edge data
 */

#define UNCOLORED -1

typedef graphlab::empty edge_data_type;
bool EDGE_CONSISTENT = false;

signed int max_saturation = 0;
signed int current_saturation = 0;
bool still_uncolored = true;
bool next_component = false;
unsigned int uncoloredvs = 0;
unsigned int uncoloredvs_old = 0;
std::set<int> used_colors;
std::set<int> sats;
/*
 * This is the gathering type which accumulates an (unordered) set of
 * all neighboring colors 
 * It is a simple wrapper around a boost::unordered_set with
 * an operator+= which simply performs a set union.
 *
 * This struct can be significantly accelerated for small sets.
 * Small collections of vertex IDs should not require the overhead
 * of the unordered_set.
 */
struct set_union_gather {
  boost::unordered_set<color_type> colors;

  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    foreach(graphlab::vertex_id_type othervid, other.colors) {
      colors.insert(othervid);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << colors;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> colors;
  }
};
/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;


/*
 * On gather, we accumulate a set of all adjacent colors.
 */
class graph_coloring:
      public graphlab::ivertex_program<graph_type,
                                      set_union_gather>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_gather gather;
    color_type other_color = edge.source().id() == vertex.id() ?
                                 edge.target().data().color: edge.source().data().color;
    gather.colors.insert(other_color);

    return gather;
  }

  /*
   * the gather result now contains the colors in the neighborhood.
   * pick a different color and store it 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
    // find the smallest color not described in the neighborhood
    size_t neighborhoodsize = neighborhood.colors.size();
    for (color_type curcolor = 0; curcolor < neighborhoodsize + 1; ++curcolor) {
      if (neighborhood.colors.count(curcolor) == 0) {
        used_colors.insert(curcolor);
        vertex.data().color = curcolor;
        break;
      }
    }
  }

  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    if (EDGE_CONSISTENT) return graphlab::NO_EDGES;
    else return graphlab::ALL_EDGES;
  } 


  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {
    // both points have different colors!
    if (edge.source().data().color == edge.target().data().color) {
      context.signal(edge.source().id() == vertex.id() ? 
                      edge.target() : edge.source());
    }

    else if (vertex.id() == edge.target().id()) {
      edge.source().data().adj_colors.insert(vertex.data().color);
      edge.source().data().saturation = edge.source().data().adj_colors.size();
    }
    else {
      edge.target().data().adj_colors.insert(vertex.data().color);
      edge.target().data().saturation = edge.target().data().adj_colors.size();
    }
      
    //}
  }
};

void initialize_vertex_values(graph_type::vertex_type& v) {
  v.data().saturation = 0;
  v.data().color = UNCOLORED;
}

void calculate_saturation(graph_type::vertex_type& v) {

  if (v.data().saturation > 0 && v.data().color == UNCOLORED) {
    next_component = false;
    sats.insert(v.data().saturation);
    if (v.data().saturation > max_saturation) {
      max_saturation = v.data().saturation;
    }
  }
  else if (v.data().color == UNCOLORED)
    still_uncolored = true;
}


/*
 * A saver which saves a file where each line is a vid / color pair
 */
struct save_colors{
  std::string save_vertex(graph_type::vertex_type v) { 
    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(v.data().color) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};

typedef graphlab::async_consistent_engine<graph_coloring> engine_type;

graphlab::empty signal_vertices_at_saturation (engine_type::icontext_type& ctx,
                                     const graph_type::vertex_type& vertex) {
  if (vertex.data().saturation == current_saturation && vertex.data().color == UNCOLORED) {
    ctx.signal(vertex);
  }
  return graphlab::empty();
}

graphlab::empty signal_uncolored (engine_type::icontext_type& ctx,
                                     const graph_type::vertex_type& vertex) {
  if (vertex.data().color == UNCOLORED) {
    ctx.signal(vertex);
  }
  return graphlab::empty();
}

struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE {
  size_t degree;
  graphlab::vertex_id_type vid;
  max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) {
    if (degree < other.degree) {
      (*this) = other;
    }
    return (*this);
  }
};

max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) {
  //we only want uncolored  
  if (vtx.data().color == UNCOLORED) {
    max_deg_vertex_reducer red;
    red.degree = vtx.num_out_edges();
    red.vid = vtx.id();
    return red;
  }
}

/**************************************************************************/
/*                                                                        */
/*                         Validation   Functions                         */
/*                                                                        */
/**************************************************************************/
size_t validate_conflict(graph_type::edge_type& edge) {
  return edge.source().data().color == edge.target().data().color;
}


int main(int argc, char** argv) {

  //global_logger().set_log_level(LOG_INFO);

  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;


  dc.cout() << "This program computes a simple graph coloring of a "
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Graph coloring. "
    "Given a graph, this program computes a graph coloring of the graph."
    "The Asynchronous engine is used.");
  std::string prefix, format;
  std::string output;
  float alpha = 2.1;
  size_t powerlaw = 0;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
   clopts.attach_option("output", output,
                       "A prefix to save the output.");
   clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
   clopts.attach_option("alpha", alpha,
                       "Alpha in powerlaw distrubution");
  clopts.attach_option("edgescope", EDGE_CONSISTENT,
                       "Use Locking. ");
    
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix.length() == 0 && powerlaw == 0) {
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (output == "") {
    dc.cout() << "Warning! Output will not be saved\n";
  }


  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);

  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, alpha, 100000000);
  } else { // Load the graph from a file
    if (prefix == "") {
      dc.cout() << "--graph is not optional\n";
      return EXIT_FAILURE;
    }
    else if (format == "") {
      dc.cout() << "--format is not optional\n";
      return EXIT_FAILURE;
    }
    graph.load_format(prefix, format);
  }
  graph.finalize();

  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
    << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;

  
  dc.cout() << "Initialising vertex data..." <<std::endl;
  graph.transform_vertices(initialize_vertex_values);

  dc.cout() << "Finding max degree vertex..." << std::endl;
  max_deg_vertex_reducer v = graph.map_reduce_vertices<max_deg_vertex_reducer>(find_max_deg_vertex);
  

  // create engine to count the number of triangles
  dc.cout() << "Coloring..." << std::endl;
  if (EDGE_CONSISTENT) {
    clopts.get_engine_args().set_option("factorized", false);
  } else {
    clopts.get_engine_args().set_option("factorized", true);
  } 
  graphlab::async_consistent_engine<graph_coloring> engine(dc, graph, clopts);

  engine.signal(v.vid);
  engine.start();

  //Continue execution until all nodes are colored
  while (still_uncolored) {
    still_uncolored = false;
    next_component = true;
    graph.transform_vertices(calculate_saturation); 
    for (int x = max_saturation; x > 0; x--) {    
      if (sats.find(x) != sats.end()) {
        current_saturation = x;
        engine.map_reduce_vertices<graphlab::empty>(signal_vertices_at_saturation);
      }
    }
    engine.start();
    
    /*
     *Colors the component with the highest degree and then colors all other components 
     *randomly if the graph has more than one component.
     */
    if (next_component) {
      dc.cout() << "Colouring other components..." <<std::endl;
      engine.map_reduce_vertices<graphlab::empty>(signal_uncolored);
      engine.start();
      still_uncolored = false;
      break;
    }

    max_saturation = 0;
    sats.clear();
  } 

  size_t conflict_count = graph.map_reduce_edges<size_t>(validate_conflict);

  dc.cout() << "Colored in " << ti.current_time() << " seconds" << std::endl;
  dc.cout() << "Colored using " << used_colors.size() << " colors" << std::endl;
  dc.cout() << "Num conflicts = " << conflict_count << "\n";
  if (output != "") {
    graph.save(output,
              save_colors(),
              false, /* no compression */
              true, /* save vertex */
              false, /* do not save edge */
              1); /* one file per machine */
  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/simple_coloring.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/macros_def.hpp>


typedef graphlab::vertex_id_type color_type;

/*
 * no edge data
 */
typedef graphlab::empty edge_data_type;
bool EDGE_CONSISTENT = false;

std::set<int> used_colors;
/*
 * This is the gathering type which accumulates an (unordered) set of
 * all neighboring colors 
 * It is a simple wrapper around a boost::unordered_set with
 * an operator+= which simply performs a set union.
 *
 * This struct can be significantly accelerated for small sets.
 * Small collections of vertex IDs should not require the overhead
 * of the unordered_set.
 */
struct set_union_gather {
  boost::unordered_set<color_type> colors;

  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    foreach(graphlab::vertex_id_type othervid, other.colors) {
      colors.insert(othervid);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << colors;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> colors;
  }
};

/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<color_type,
                                    edge_data_type> graph_type;


/*
 * On gather, we accumulate a set of all adjacent colors.
 */
class graph_coloring:
      public graphlab::ivertex_program<graph_type,
                                      set_union_gather>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_gather gather;
    color_type other_color = edge.source().id() == vertex.id() ?
                                 edge.target().data(): edge.source().data();
    // vertex_id_type otherid= edge.source().id() == vertex.id() ?
    //                              edge.target().id(): edge.source().id();
     gather.colors.insert(other_color);
    return gather;
  }

  /*
   * the gather result now contains the colors in the neighborhood.
   * pick a different color and store it 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
    // find the smallest color not described in the neighborhood
    size_t neighborhoodsize = neighborhood.colors.size();
    for (color_type curcolor = 0; curcolor < neighborhoodsize + 1; ++curcolor) {
      if (neighborhood.colors.count(curcolor) == 0) {
        used_colors.insert(curcolor);
        vertex.data() = curcolor;
        break;
      }
    }
  }


  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    if (EDGE_CONSISTENT) return graphlab::NO_EDGES;
    else return graphlab::ALL_EDGES;
  } 


  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {
    // both points have different colors!
    if (edge.source().data() == edge.target().data()) {
      context.signal(edge.source().id() == vertex.id() ? 
                      edge.target() : edge.source());
    }
  }
};


/*
 * A saver which saves a file where each line is a vid / color pair
 */
struct save_colors{
  std::string save_vertex(graph_type::vertex_type v) { 
    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(v.data()) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};


/**************************************************************************/
/*                                                                        */
/*                         Validation   Functions                         */
/*                                                                        */
/**************************************************************************/
size_t validate_conflict(graph_type::edge_type& edge) {
  return edge.source().data() == edge.target().data();
}


int main(int argc, char** argv) {

  //global_logger().set_log_level(LOG_INFO);

  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;


  dc.cout() << "This program computes a simple graph coloring of a"
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Graph coloring. "
    "Given a graph, this program computes a graph coloring of the graph."
    "The Asynchronous engine is used.");
  std::string prefix, format;
  std::string output;
  float alpha = 2.1;
  size_t powerlaw = 0;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
   clopts.attach_option("output", output,
                       "A prefix to save the output.");
   clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
      clopts.attach_option("alpha", alpha,
                       "Alpha in powerlaw distrubution");
  clopts.attach_option("edgescope", EDGE_CONSISTENT,
                       "Use Locking. ");
    
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix.length() == 0 && powerlaw == 0) {
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (output == "") {
    dc.cout() << "Warning! Output will not be saved\n";
  }


  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);

  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, alpha, 100000000);
  } else { // Load the graph from a file
    if (prefix == "") {
      dc.cout() << "--graph is not optional\n";
      return EXIT_FAILURE;
    }
    else if (format == "") {
      dc.cout() << "--format is not optional\n";
      return EXIT_FAILURE;
    }
    graph.load_format(prefix, format);
  }
  graph.finalize();

  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
    << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;
  
  // create engine to count the number of triangles
  dc.cout() << "Coloring..." << std::endl;
  if (EDGE_CONSISTENT) {
    clopts.get_engine_args().set_option("factorized", false);
  } else {
    clopts.get_engine_args().set_option("factorized", true);
  } 
  graphlab::async_consistent_engine<graph_coloring> engine(dc, graph, clopts);
  engine.signal_all();
  engine.start();


  dc.cout() << "Colored in " << ti.current_time() << " seconds" << std::endl;
  dc.cout() << "Colored using " << used_colors.size() << " colors" << std::endl;
		  
  size_t conflict_count = graph.map_reduce_edges<size_t>(validate_conflict);
  dc.cout() << "Num conflicts = " << conflict_count << "\n";
  if (output != "") {
    graph.save(output,
              save_colors(),
              false, /* no compression */
              true, /* save vertex */
              false, /* do not save edge */
              1); /* one file per machine */
  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/simple_undirected_triangle_count.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/macros_def.hpp>
/**
 *  
 * In this program we implement the "hash-table" version of the
 * "edge-iterator" algorithm described in
 * 
 *    T. Schank. Algorithmic Aspects of Triangle-Based Network Analysis.
 *    Phd in computer science, University Karlsruhe, 2007.
 *
 * The procedure is quite straightforward:
 *   - each vertex maintains a list of all of its neighbors in a hash table.
 *   - For each edge (u,v) in the graph, count the number of intersections
 *     of the neighbor set on u and the neighbor set on v.
 *   - We store the size of the intersection on the edge.
 * 
 * This will count every triangle exactly 3 times. Summing across all the
 * edges and dividing by 3 gives the desired result.
 *
 * The preprocessing stage take O(|E|) time, and it has been shown that this
 * algorithm takes $O(|E|^(3/2))$ time.
 *
 * If we only require total counts, we can introduce a optimization that is
 * similar to the "forward" algorithm
 * described in thesis above. Instead of maintaining a complete list of all
 * neighbors, each vertex only maintains a list of all neighbors with
 * ID greater than itself. This implicitly generates a topological sort
 * of the graph.
 *
 * Then you can see that each triangle
 *
 * \verbatim
  
     A----->C
     |     ^
     |   /
     v /
     B
   
 * \endverbatim
 * Must be counted only once. (Only when processing edge AB, can one
 * observe that A and B have intersecting out-neighbor sets).
 *
 *
 * \note The implementation here is built to be easy to understand
 * and not necessarily optimal. In particular the unordered_set is slow
 * for small number of entries. There is a much more efficient
 * (and substantially more complicated) version in undirected_triangle_count.cpp
 */

/*
 * Each vertex maintains a list of all its neighbors.
 * and a final count for the number of triangles it is involved in
 */
struct vertex_data_type {
  vertex_data_type():num_triangles(0) { }
  // A list of all its neighbors
  boost::unordered_set<graphlab::vertex_id_type> vid_set;
  // The number of triangles this vertex is involved it.
  // only used if "per vertex counting" is used
  size_t num_triangles;
  
  void save(graphlab::oarchive &oarc) const {
    oarc << vid_set << num_triangles;
  }
  void load(graphlab::iarchive &iarc) {
    iarc >> vid_set >> num_triangles;
  }
};


/*
 * Each edge is simply a counter of triangles
 */
typedef size_t edge_data_type;

// To collect the set of neighbors, we need a message type which is
// basically a set of vertex IDs

bool PER_VERTEX_COUNT = false;


/*
 * This is the gathering type which accumulates an (unordered) set of
 * all neighboring vertices.
 * It is a simple wrapper around a boost::unordered_set with
 * an operator+= which simply performs a set union.
 *
 * This struct can be significantly accelerated for small sets.
 * Small collections of vertex IDs should not require the overhead
 * of the unordered_set.
 */
struct set_union_gather {
  boost::unordered_set<graphlab::vertex_id_type> vid_set;

  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    foreach(graphlab::vertex_id_type othervid, other.vid_set) {
      vid_set.insert(othervid);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << vid_set;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> vid_set;
  }
};

/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;


/*
 * This class implements the triangle counting algorithm as described in
 * the header. On gather, we accumulate a set of all adjacent vertices.
 * If per_vertex output is not necessary, we can use the optimization
 * where each vertex only accumulates neighbors with greater vertex IDs.
 */
class triangle_count :
      public graphlab::ivertex_program<graph_type,
                                      set_union_gather>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_gather gather;
    // Insert the opposite end of the edge IF the opposite end has
    // ID greater than the current vertex
    // If we are getting per vertex counts, we need the entire neighborhood
    vertex_id_type otherid = edge.source().id() == vertex.id() ?
                             edge.target().id() : edge.source().id();
    if (PER_VERTEX_COUNT ||
        otherid > vertex.id()) gather.vid_set.insert(otherid);
    return gather;
  }

  /*
   * the gather result now contains the vertex IDs in the neighborhood.
   * store it on the vertex. 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
    vertex.data().vid_set = neighborhood.vid_set;
  } // end of apply

  /*
   * Scatter over all edges to compute the intersection.
   * I only need to touch each edge once, so if I scatter just on the
   * out edges, that is sufficient.
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return graphlab::OUT_EDGES;
  }


  /*
   * Computes the size of the intersection of two unordered sets
   */
  static size_t count_set_intersect(
               const boost::unordered_set<vertex_id_type>& smaller_set,
               const boost::unordered_set<vertex_id_type>& larger_set) {
    size_t count = 0;
    foreach(vertex_id_type vid, smaller_set) {
      count += larger_set.count(vid);
    }
    return count;
  }

  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {
    const vertex_data_type& srclist = edge.source().data();
    const vertex_data_type& targetlist = edge.target().data();
    if (srclist.vid_set.size() >= targetlist.vid_set.size()) {
      edge.data() = count_set_intersect(targetlist.vid_set, srclist.vid_set);
    }
    else {
      edge.data() = count_set_intersect(srclist.vid_set, targetlist.vid_set);
    }
  }
};


/*
 * This class is used in a second engine call if per vertex counts are needed.
 * The number of triangles a vertex is involved in can be computed easily
 * by summing over the number of triangles each adjacent edge is involved in
 * and dividing by 2. 
 */
class get_per_vertex_count :
      public graphlab::ivertex_program<graph_type, size_t>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  // We gather the number of triangles each edge is involved in
  size_t gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    return edge.data();
  }

  /* the gather result is the total sum of the number of triangles
   * each adjacent edge is involved in . Dividing by 2 gives the
   * desired result.
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& num_triangles) {
    vertex.data().num_triangles = num_triangles / 2;
  }

  // No scatter
  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }


};


/* Used to sum over all the edges in the graph in a
 * map_reduce_edges call
 * to get the total number of triangles
 */
size_t get_edge_data(const graph_type::edge_type& e) {
  return e.data();
}


/*
 * A saver which saves a file where each line is a vid / # triangles pair
 */
struct save_triangle_count{
  std::string save_vertex(graph_type::vertex_type v) { 
    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(v.data().num_triangles) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};


int main(int argc, char** argv) {
  std::cout << "This program counts the exact number of triangles in the "
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Exact Triangle Counting. "
    "Given a graph, this program computes the total number of triangles "
    "in the graph. An option (per_vertex) is also provided which "
    "computes for each vertex, the number of triangles it is involved in."
    "The algorithm assumes that each undirected edge appears exactly once "
    "in the graph input. If edges may appear more than once, this procedure "
    "will over count.");
  std::string prefix, format;
  std::string per_vertex;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
  clopts.attach_option("per_vertex", per_vertex,
                       "If not empty, will count the number of "
                       "triangles each vertex belongs to and "
                       "save to file with prefix \"[per_vertex]\". "
                       "The algorithm used is slightly different "
                       "and thus will be a little slower");
  
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix == "") {
    std::cout << "--graph is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  else if (format == "") {
    std::cout << "--format is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }


  if (per_vertex != "") PER_VERTEX_COUNT = true;
  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);
  graph.load_format(prefix, format);
  graph.finalize();
  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
            << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;
  
  // create engine to count the number of triangles
  dc.cout() << "Counting Triangles..." << std::endl;
  graphlab::synchronous_engine<triangle_count> engine(dc, graph, clopts);
  engine.signal_all();
  engine.start();

  dc.cout() << "Counted in " << ti.current_time() << " seconds" << std::endl;

  if (PER_VERTEX_COUNT == false) {
    size_t count = graph.map_reduce_edges<size_t>(get_edge_data);
    dc.cout() << count << " Triangles"  << std::endl;
  }
  else {
    graphlab::synchronous_engine<get_per_vertex_count> engine(dc, graph, clopts);
    engine.signal_all();
    engine.start();
    graph.save(per_vertex,
            save_triangle_count(),
            false, /* no compression */
            true, /* save vertex */
            false, /* do not save edge */
            1); /* one file per machine */

  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/sssp.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>


#include <graphlab.hpp>


/**
 * \brief The type used to measure distances in the graph.
 */
typedef float distance_type;

/**
 * \brief The current distance of the vertex.
 */
struct vertex_data : graphlab::IS_POD_TYPE {
  distance_type dist;
  vertex_data(distance_type dist = std::numeric_limits<distance_type>::max()) :
    dist(dist) { }
}; // end of vertex data


/**
 * \brief The distance associated with the edge.
 */
struct edge_data : graphlab::IS_POD_TYPE {
  distance_type dist;
  edge_data(distance_type dist = 1) : dist(dist) { }
}; // end of edge data


/**
 * \brief The graph type encodes the distances between vertices and
 * edges
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/**
 * \brief Get the other vertex in the edge.
 */
inline graph_type::vertex_type
get_other_vertex(const graph_type::edge_type& edge,
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}


/**
 * \brief Use directed or undireced edges.
 */
bool DIRECTED_SSSP = false;


/**
 * \brief This class is used as the gather type.
 */
struct min_distance_type : graphlab::IS_POD_TYPE {
  distance_type dist;
  min_distance_type(distance_type dist = 
                    std::numeric_limits<distance_type>::max()) : dist(dist) { }
  min_distance_type& operator+=(const min_distance_type& other) {
    dist = std::min(dist, other.dist);
    return *this;
  }
};


/**
 * \brief The single source shortest path vertex program.
 */
class sssp :
  public graphlab::ivertex_program<graph_type, 
                                   graphlab::empty,
                                   min_distance_type>,
  public graphlab::IS_POD_TYPE {
  distance_type min_dist;
  bool changed;
public:


  void init(icontext_type& context, const vertex_type& vertex,
            const min_distance_type& msg) {
    min_dist = msg.dist;
  } 

  /**
   * \brief We use the messaging model to compute the SSSP update
   */
  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const { 
    return graphlab::NO_EDGES;
  }; // end of gather_edges 


  // /** 
  //  * \brief Collect the distance to the neighbor
  //  */
  // min_distance_type gather(icontext_type& context, const vertex_type& vertex, 
  //                          edge_type& edge) const {
  //   return min_distance_type(edge.data() + 
  //                            get_other_vertex(edge, vertex).data());
  // } // end of gather function


  /**
   * \brief If the distance is smaller then update
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const graphlab::empty& empty) {
    changed = false;
    if(vertex.data().dist > min_dist) {
      changed = true;
      vertex.data().dist = min_dist;
    }
  }

  /**
   * \brief Determine if SSSP should run on all edges or just in edges
   */
  edge_dir_type scatter_edges(icontext_type& context, 
                             const vertex_type& vertex) const {
    if(changed)
      return DIRECTED_SSSP? graphlab::OUT_EDGES : graphlab::ALL_EDGES; 
    else return graphlab::NO_EDGES;
  }; // end of scatter_edges

  /**
   * \brief The scatter function just signal adjacent pages 
   */
  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    const vertex_type other = get_other_vertex(edge, vertex);
    distance_type newd = vertex.data().dist + edge.data().dist;
    if (other.data().dist > newd) {
      const min_distance_type msg(newd);
      context.signal(other, msg);
    }
  } // end of scatter

}; // end of shortest path vertex program


/**
 * \brief We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct shortest_path_writer {
  std::string save_vertex(const graph_type::vertex_type& vtx) {
    std::stringstream strm;
    strm << vtx.id() << "\t" << vtx.data().dist << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of shortest_path_writer


struct max_deg_vertex_reducer: public graphlab::IS_POD_TYPE {
  size_t degree;
  graphlab::vertex_id_type vid;
  max_deg_vertex_reducer& operator+=(const max_deg_vertex_reducer& other) {
    if (degree < other.degree) {
      (*this) = other;
    }
    return (*this);
  }
};

max_deg_vertex_reducer find_max_deg_vertex(const graph_type::vertex_type vtx) {
  max_deg_vertex_reducer red;
  red.degree = vtx.num_in_edges() + vtx.num_out_edges();
  red.vid = vtx.id();
  return red;
}

int main(int argc, char** argv) {
  // Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  graphlab::command_line_options 
    clopts("Single Source Shortest Path Algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  std::string exec_type = "synchronous";
  size_t powerlaw = 0;
  std::vector<unsigned int> sources;
  bool max_degree_source = false;
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "graph format");
  clopts.attach_option("source", sources,
                       "The source vertices");
  clopts.attach_option("max_degree_source", max_degree_source,
                       "Add the vertex with maximum degree as a source");

  clopts.add_positional("source");

  clopts.attach_option("directed", DIRECTED_SSSP,
                       "Treat edges as directed.");

  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
 
  
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }


  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2, 100000000);
  } else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  } else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices:  " << graph.num_vertices() << std::endl
            << "#edges:     " << graph.num_edges() << std::endl;


  if(sources.empty()) {
    if (max_degree_source == false) {
      dc.cout()
        << "No source vertex provided. Adding vertex 0 as source" 
        << std::endl;
      sources.push_back(0);
    }
  }

  if (max_degree_source) {
    max_deg_vertex_reducer v = graph.map_reduce_vertices<max_deg_vertex_reducer>(find_max_deg_vertex);
    dc.cout()
      << "No source vertex provided.  Using highest degree vertex " << v.vid << " as source."
      << std::endl;
    sources.push_back(v.vid);
  }


  // Running The Engine -------------------------------------------------------
  graphlab::omni_engine<sssp> engine(dc, graph, exec_type, clopts);


  // Signal all the vertices in the source set
  for(size_t i = 0; i < sources.size(); ++i) {
    engine.signal(sources[i], min_distance_type(0));
  }

  engine.start();
  const float runtime = engine.elapsed_seconds();
  dc.cout() << "Finished Running engine in " << runtime
            << " seconds." << std::endl;


  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, shortest_path_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  // Tear-down communication layer and quit -----------------------------------
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation


================================================
FILE: toolkits/graph_analytics/undirected_triangle_count.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <boost/unordered_set.hpp>
#include <graphlab.hpp>
#include <graphlab/ui/metrics_server.hpp>
#include <graphlab/util/hopscotch_set.hpp>
#include <graphlab/macros_def.hpp>
/**
 *  
 * In this program we implement the "hash-set" version of the
 * "edge-iterator" algorithm described in
 * 
 *    T. Schank. Algorithmic Aspects of Triangle-Based Network Analysis.
 *    Phd in computer science, University Karlsruhe, 2007.
 *
 * The procedure is quite straightforward:
 *   - each vertex maintains a list of all of its neighbors in a hash set.
 *   - For each edge (u,v) in the graph, count the number of intersections
 *     of the neighbor set on u and the neighbor set on v.
 *   - We store the size of the intersection on the edge.
 * 
 * This will count every triangle exactly 3 times. Summing across all the
 * edges and dividing by 3 gives the desired result.
 *
 * The preprocessing stage take O(|E|) time, and it has been shown that this
 * algorithm takes $O(|E|^(3/2))$ time.
 *
 * If we only require total counts, we can introduce a optimization that is
 * similar to the "forward" algorithm
 * described in thesis above. Instead of maintaining a complete list of all
 * neighbors, each vertex only maintains a list of all neighbors with
 * ID greater than itself. This implicitly generates a topological sort
 * of the graph.
 *
 * Then you can see that each triangle
 *
 * \verbatim
  
     A----->C
     |     ^
     |   /
     v /
     B
   
 * \endverbatim
 * Must be counted only once. (Only when processing edge AB, can one
 * observe that A and B have intersecting out-neighbor sets).
 */
 

// Radix sort implementation from https://github.com/gorset/radix
// Thanks to Erik Gorset
//
/*
Copyright 2011 Erik Gorset. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.

THIS SOFTWARE IS PROVIDED BY Erik Gorset ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Erik Gorset OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of Erik Gorset.
*/
void radix_sort(graphlab::vertex_id_type *array, int offset, int end, int shift) {
    int x, y;
    graphlab::vertex_id_type value, temp;
    int last[256] = { 0 }, pointer[256];

    for (x=offset; x<end; ++x) {
        ++last[(array[x] >> shift) & 0xFF];
    }

    last[0] += offset;
    pointer[0] = offset;
    for (x=1; x<256; ++x) {
        pointer[x] = last[x-1];
        last[x] += last[x-1];
    }

    for (x=0; x<256; ++x) {
        while (pointer[x] != last[x]) {
            value = array[pointer[x]];
            y = (value >> shift) & 0xFF;
            while (x != y) {
                temp = array[pointer[y]];
                array[pointer[y]++] = value;
                value = temp;
                y = (value >> shift) & 0xFF;
            }
            array[pointer[x]++] = value;
        }
    }

    if (shift > 0) {
        shift -= 8;
        for (x=0; x<256; ++x) {
            temp = x > 0 ? pointer[x] - pointer[x-1] : pointer[0] - offset;
            if (temp > 64) {
                radix_sort(array, pointer[x] - temp, pointer[x], shift);
            } else if (temp > 1) {
                std::sort(array + (pointer[x] - temp), array + pointer[x]);
                //insertion_sort(array, pointer[x] - temp, pointer[x]);
            }
        }
    }
}

size_t HASH_THRESHOLD = 64;

// We on each vertex, either a vector of sorted VIDs
// or a hash set (cuckoo hash) of VIDs.
// If the number of elements is greater than HASH_THRESHOLD,
// the hash set is used. Otherwise the vector is used.
struct vid_vector{
  std::vector<graphlab::vertex_id_type> vid_vec;
  graphlab::hopscotch_set<graphlab::vertex_id_type> *cset;
  vid_vector(): cset(NULL) { }
  vid_vector(const vid_vector& v):cset(NULL) {
    (*this) = v;
  }

  vid_vector& operator=(const vid_vector& v) {
    if (this == &v) return *this;
    vid_vec = v.vid_vec;
    if (v.cset != NULL) {
      // allocate the cuckoo set if the other side is using a cuckoo set
      // or clear if I alrady have one
      if (cset == NULL) {
        cset = new graphlab::hopscotch_set<graphlab::vertex_id_type>(HASH_THRESHOLD);
      }
      else {
        cset->clear();
      }
      (*cset) = *(v.cset);
    }
    else {
      // if the other side is not using a cuckoo set, lets not use a cuckoo set
      // either
      if (cset != NULL) {
        delete cset;
        cset = NULL;
      }
    }
    return *this;
  }

  ~vid_vector() {
    if (cset != NULL) delete cset;
  }

  // assigns a vector of vertex IDs to this storage.
  // this function will clear the contents of the vid_vector
  // and reconstruct it.
  // If the assigned values has length >= HASH_THRESHOLD,
  // we will allocate a cuckoo set to store it. Otherwise,
  // we just store a sorted vector
  void assign(const std::vector<graphlab::vertex_id_type>& vec) {
    clear();
    if (vec.size() >= HASH_THRESHOLD) {
        // move to cset
        cset = new graphlab::hopscotch_set<graphlab::vertex_id_type>(HASH_THRESHOLD);
        foreach (graphlab::vertex_id_type v, vec) {
          cset->insert(v);
        }
    }
    else {
      vid_vec = vec;
      if (vid_vec.size() > 64) {
        radix_sort(&(vid_vec[0]), 0, vid_vec.size(), 24);
      }
      else {
        std::sort(vid_vec.begin(), vid_vec.end());
      }
      std::vector<graphlab::vertex_id_type>::iterator new_end = std::unique(vid_vec.begin(),
                                               vid_vec.end());
      vid_vec.erase(new_end, vid_vec.end());
    }
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << (cset != NULL);
    if (cset == NULL) oarc << vid_vec;
    else oarc << (*cset);
  }


  void clear() {
    vid_vec.clear();
    if (cset != NULL) {
      delete cset;
      cset = NULL;
    }
  }

  size_t size() const {
    return cset == NULL ? vid_vec.size() : cset->size();
  }

  void load(graphlab::iarchive& iarc) {
    clear();
    bool hascset;
    iarc >> hascset;
    if (!hascset) iarc >> vid_vec;
    else {
      cset = new graphlab::hopscotch_set<graphlab::vertex_id_type>(HASH_THRESHOLD);
      iarc >> (*cset);
    }
  }
};

/*
  A simple counting iterator which can be used as an insert iterator.
  but only counts the number of elements inserted. Useful for
  use with counting the size of an intersection using std::set_intersection
*/
template <typename T>
struct counting_inserter {
  size_t* i;
  counting_inserter(size_t* i):i(i) { }
  counting_inserter& operator++() {
    ++(*i);
    return *this;
  }
  void operator++(int) {
    ++(*i);
  }

  struct empty_val {
    empty_val operator=(const T&) { return empty_val(); }
  };

  empty_val operator*() {
    return empty_val();
  }

  typedef empty_val reference;
};


/*
 * Computes the size of the intersection of two vid_vector's
 */
static uint32_t count_set_intersect(
             const vid_vector& smaller_set,
             const vid_vector& larger_set) {

  if (smaller_set.cset == NULL && larger_set.cset == NULL) {
    size_t i = 0;
    counting_inserter<graphlab::vertex_id_type> iter(&i);
    std::set_intersection(smaller_set.vid_vec.begin(), smaller_set.vid_vec.end(),
                          larger_set.vid_vec.begin(), larger_set.vid_vec.end(),
                          iter);
    return i;
  }
  else if (smaller_set.cset == NULL && larger_set.cset != NULL) {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, smaller_set.vid_vec) {
      i += larger_set.cset->count(vid);
    }
    return i;
  }
  else if (smaller_set.cset != NULL && larger_set.cset == NULL) {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, larger_set.vid_vec) {
      i += smaller_set.cset->count(vid);
    }
    return i;
  }
  else {
    size_t i = 0;
    foreach(graphlab::vertex_id_type vid, *(smaller_set.cset)) {
      i += larger_set.cset->count(vid);
    }
    return i;

  }
}


/*
 * Each vertex maintains a list of all its neighbors.
 * and a final count for the number of triangles it is involved in
 */
struct vertex_data_type {
  vertex_data_type(): num_triangles(0){ }
  // A list of all its neighbors
  vid_vector vid_set;
  // The number of triangles this vertex is involved it.
  // only used if "per vertex counting" is used
  uint32_t num_triangles;
  void save(graphlab::oarchive &oarc) const {
    oarc << vid_set << num_triangles;
  }
  void load(graphlab::iarchive &iarc) {
    iarc >> vid_set >> num_triangles;
  }
};


/*
 * Each edge is simply a counter of triangles
 */
typedef uint32_t edge_data_type;

// To collect the set of neighbors, we need a message type which is
// basically a set of vertex IDs

bool PER_VERTEX_COUNT = false;


/*
 * This is the gathering type which accumulates an array of
 * all neighboring vertices.
 * It is a simple wrapper around a vector with
 * an operator+= which simply performs a  +=
 */
struct set_union_gather {
  graphlab::vertex_id_type v;
  std::vector<graphlab::vertex_id_type> vid_vec;

  set_union_gather():v(-1) {
  }

  size_t size() const {
    if (v == (graphlab::vertex_id_type)-1) return vid_vec.size();
    else return 1;
  }
  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    if (size() == 0) {
      (*this) = other;
      return (*this);
    }
    else if (other.size() == 0) {
      return *this;
    }

    if (vid_vec.size() == 0) {
      vid_vec.push_back(v);
      v = (graphlab::vertex_id_type)(-1);
    }
    if (other.vid_vec.size() > 0) {
      size_t ct = vid_vec.size();
      vid_vec.resize(vid_vec.size() + other.vid_vec.size());
      for (size_t i = 0; i < other.vid_vec.size(); ++i) {
        vid_vec[ct + i] = other.vid_vec[i];
      }
    }
    else if (other.v != (graphlab::vertex_id_type)-1) {
      vid_vec.push_back(other.v);
    }
    return *this;
  }
  
  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << bool(vid_vec.size() == 0);
    if (vid_vec.size() == 0) oarc << v;
    else oarc << vid_vec;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    bool novvec;
    v = (graphlab::vertex_id_type)(-1);
    vid_vec.clear();
    iarc >> novvec;
    if (novvec) iarc >> v;
    else iarc >> vid_vec;
  }
};

/*
 * Define the type of the graph
 */
typedef graphlab::distributed_graph<vertex_data_type,
                                    edge_data_type> graph_type;


/*
 * This class implements the triangle counting algorithm as described in
 * the header. On gather, we accumulate a set of all adjacent vertices.
 * If per_vertex output is not necessary, we can use the optimization
 * where each vertex only accumulates neighbors with greater vertex IDs.
 */
class triangle_count :
      public graphlab::ivertex_program<graph_type,
                                      set_union_gather>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  bool do_not_scatter;

  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } 

  /*
   * For each edge, figure out the ID of the "other" vertex
   * and accumulate a set of the neighborhood vertex IDs.
   */
  gather_type gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    set_union_gather gather;
    graphlab::vertex_id_type otherid = edge.target().id() == vertex.id() ?
                                       edge.source().id() : edge.target().id();

    size_t other_nbrs = (edge.target().id() == vertex.id()) ?
        (edge.source().num_in_edges() + edge.source().num_out_edges()): 
        (edge.target().num_in_edges() + edge.target().num_out_edges());

    size_t my_nbrs = vertex.num_in_edges() + vertex.num_out_edges();

    if (PER_VERTEX_COUNT || (other_nbrs > my_nbrs) || (other_nbrs == my_nbrs && otherid > vertex.id())) {
    //if (PER_VERTEX_COUNT || otherid > vertex.id()) {
     gather.v = otherid;
    } 
    return gather;
  }

  /*
   * the gather result now contains the vertex IDs in the neighborhood.
   * store it on the vertex. 
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& neighborhood) {
   do_not_scatter = false;
   if (neighborhood.vid_vec.size() == 0) {
     // neighborhood set may be empty or has only 1 element
     vertex.data().vid_set.clear();
     if (neighborhood.v != (graphlab::vertex_id_type(-1))) {
       vertex.data().vid_set.vid_vec.push_back(neighborhood.v);
     }
   }
   else {
     vertex.data().vid_set.assign(neighborhood.vid_vec);
   }
   do_not_scatter = vertex.data().vid_set.size() == 0;
  } // end of apply

  /*
   * Scatter over all edges to compute the intersection.
   * I only need to touch each edge once, so if I scatter just on the
   * out edges, that is sufficient.
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    if (do_not_scatter) return graphlab::NO_EDGES;
    else return graphlab::OUT_EDGES;
  }


  /*
   * For each edge, count the intersection of the neighborhood of the
   * adjacent vertices. This is the number of triangles this edge is involved
   * in.
   */
  void scatter(icontext_type& context,
              const vertex_type& vertex,
              edge_type& edge) const {

    //    vertex_type othervtx = edge.target();
    const vertex_data_type& srclist = edge.source().data();
    const vertex_data_type& targetlist = edge.target().data();
    if (targetlist.vid_set.size() < srclist.vid_set.size()) {
      edge.data() += count_set_intersect(targetlist.vid_set, srclist.vid_set);
    }
    else {
      edge.data() += count_set_intersect(srclist.vid_set, targetlist.vid_set);
    }
  }
};

/*
 * This class is used in a second engine call if per vertex counts are needed.
 * The number of triangles a vertex is involved in can be computed easily
 * by summing over the number of triangles each adjacent edge is involved in
 * and dividing by 2. 
 */
class get_per_vertex_count :
      public graphlab::ivertex_program<graph_type, size_t>,
      /* I have no data. Just force it to POD */
      public graphlab::IS_POD_TYPE  {
public:
  // Gather on all edges
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }
  // We gather the number of triangles each edge is involved in
  size_t gather(icontext_type& context,
                     const vertex_type& vertex,
                     edge_type& edge) const {
    return edge.data();
  }

  /* the gather result is the total sum of the number of triangles
   * each adjacent edge is involved in . Dividing by 2 gives the
   * desired result.
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& num_triangles) {
    vertex.data().vid_set.clear();
    vertex.data().num_triangles = num_triangles / 2;
  }

  // No scatter
  edge_dir_type scatter_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::NO_EDGES;
  }


};

typedef graphlab::synchronous_engine<triangle_count> engine_type;

/* Used to sum over all the edges in the graph in a
 * map_reduce_edges call
 * to get the total number of triangles
 */
size_t get_edge_data(const graph_type::edge_type& e) {
  return e.data();
}

/*
 * A saver which saves a file where each line is a vid / # triangles pair
 */
struct save_triangle_count{
  std::string save_vertex(graph_type::vertex_type v) { 
    double nt = v.data().num_triangles;
    double n_followed = v.num_out_edges();
    double n_following = v.num_in_edges();

    return graphlab::tostr(v.id()) + "\t" +
           graphlab::tostr(nt) + "\t" +
           graphlab::tostr(n_followed) + "\t" + 
           graphlab::tostr(n_following) + "\n";
  }
  std::string save_edge(graph_type::edge_type e) {
    return "";
  }
};


int main(int argc, char** argv) {
  std::cout << "This program counts the exact number of triangles in the "
            "provided graph.\n\n";

  graphlab::command_line_options clopts("Exact Triangle Counting. "
    "Given a graph, this program computes the total number of triangles "
    "in the graph. An option (per_vertex) is also provided which "
    "computes for each vertex, the number of triangles it is involved in."
    "The algorithm assumes that each undirected edge appears exactly once "
    "in the graph input. If edges may appear more than once, this procedure "
    "will over count.");
  std::string prefix, format;
  std::string per_vertex;
  clopts.attach_option("graph", prefix,
                       "Graph input. reads all graphs matching prefix*");
  clopts.attach_option("format", format,
                       "The graph format");
 clopts.attach_option("ht", HASH_THRESHOLD,
                       "Above this size, hash sets are used");
  clopts.attach_option("per_vertex", per_vertex,
                       "If not empty, will count the number of "
                       "triangles each vertex belongs to and "
                       "save to file with prefix \"[per_vertex]\". "
                       "The algorithm used is slightly different "
                       "and thus will be a little slower");
  if(!clopts.parse(argc, argv)) return EXIT_FAILURE;
  if (prefix == "") {
    std::cout << "--graph is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }
  else if (format == "") {
    std::cout << "--format is not optional\n";
    clopts.print_description();
    return EXIT_FAILURE;
  }


  if (per_vertex != "") PER_VERTEX_COUNT = true;
  // Initialize control plane using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  graphlab::launch_metric_server();
  // load graph
  graph_type graph(dc, clopts);
  graph.load_format(prefix, format);
  graph.finalize();
  dc.cout() << "Number of vertices: " << graph.num_vertices() << std::endl
            << "Number of edges:    " << graph.num_edges() << std::endl;

  graphlab::timer ti;
  
  // create engine to count the number of triangles
  dc.cout() << "Counting Triangles..." << std::endl;
  engine_type engine(dc, graph, clopts);
  engine.signal_all();
  engine.start();

  dc.cout() << "Counted in " << ti.current_time() << " seconds" << std::endl;

  if (PER_VERTEX_COUNT == false) {
    size_t count = graph.map_reduce_edges<size_t>(get_edge_data);
    dc.cout() << count << " Triangles"  << std::endl;
  }
  else {
    graphlab::synchronous_engine<get_per_vertex_count> engine(dc, graph, clopts);
    engine.signal_all();
    engine.start();
    graph.save(per_vertex,
            save_triangle_count(),
            false, /* no compression */
            true, /* save vertex */
            false, /* do not save edge */
            clopts.get_ncpus()); /* one file per machine */

  }
  
  graphlab::stop_metric_server();

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graph_analytics/warp_bond_percolation.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * \file
 *
 * Written by Danny Bickson, CMU
 * See algorithm description in Wikipedia: http://en.wikipedia.org/wiki/Percolation_theory
 */

#include <graphlab/util/stl_util.hpp>
#include <graphlab/util/timer.hpp>
#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
#include <graphlab/macros_def.hpp>
#include <map>

#define BOND_PERCOLATION_MAP_REDUCE 0
#define SYNC
bool debug;
int max_iter = 100000;
std::string output_file;
int n = 0;
float p = 0;

struct vertex_data : public graphlab::IS_POD_TYPE{
  unsigned int comp_id;
  vertex_data(): comp_id(-1) {}
};

std::size_t hash_value(vertex_data const& b) {
  return b.comp_id;
}


/**
 * \brief The edge data stores the entry in the matrix.
 *
 * In addition the edge data sgdo stores the most recent error estimate.
 */
struct edge_data : public graphlab::IS_POD_TYPE {
   unsigned int id;
   unsigned int comp_id;
   edge_data(unsigned int id) : id(id) { comp_id = id; };
   edge_data(){ id = comp_id = -1; }

}; // end of edge data

std::size_t hash_value(edge_data const& b) {
  return b.comp_id;
}


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
typedef graphlab::gl3engine<graph_type> engine_type;


/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph,
                         const std::string& filename,
                         const std::string& line) {
  ASSERT_FALSE(line.empty());
  // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  unsigned int edge_id(-1);
  strm >> source_id >> target_id >> edge_id;
  if (edge_id == (uint)-1)
    logstream(LOG_FATAL)<<"Input file must contains line with the following format: [from] [ to] [edge_id]\n " << std::endl << " where edge_id is a consecutive integer " << std::endl;

  // Create an edge and add it to the graph
  graph.add_edge(source_id, target_id, edge_data(edge_id));
  return true; // successful load
} // end of graph_loader

size_t count_component(const graph_type::edge_type & edge) {
  int diff = (edge.source().data().comp_id != edge.target().data().comp_id);
  if (debug && diff)
    std::cout<<"Adding diff between node: " << edge.source().id() << " to: " << edge.target().id()<< " compA: " << edge.source().data().comp_id << " compB: " <<
      edge.target().data().comp_id << std::endl;
  return diff;
}

unsigned int bond_percolation_map(const graph_type::vertex_type& center,
                         graph_type::edge_type& edge,
                         const graph_type::vertex_type& other) {
   if (debug)
     std::cout<<"Comparing: " << center.data().comp_id << " : " << edge.data().id << " : " << other.data().comp_id << std::endl;
   edge.data().comp_id =  std::min(std::min(center.data().comp_id, edge.data().id), other.data().comp_id);
   if (debug)
     std::cout<<"Setting edge id to: " << edge.data().comp_id << " from: " << center.id() << " to: " << other.id() << std::endl;
   return edge.data().comp_id;
}


//find min component of two edges
void bond_percolation_combine(unsigned int& v1, const unsigned int& v2) {
    v1 = std::min(v1, v2);
    if (debug)
      std::cout<<"Comparing two edge ids: " << v1 << " : " << v2 << std::endl;
}

//the main update function

void bond_percolation_function(engine_type::context_type& context,
                  graph_type::vertex_type& vertex
#ifndef SYNC
                  , const engine_type::message_type& unused) {
#else
   ){
#endif

     uint comp_id = vertex.data().comp_id;
     vertex.data().comp_id =  context.map_reduce<unsigned int>(BOND_PERCOLATION_MAP_REDUCE, graphlab::ALL_EDGES);
     if (debug && comp_id != vertex.data().comp_id)
       std::cout<<"node: " << vertex.id() << " min edge component found: " << vertex.data().comp_id << std::endl;
#ifndef SYNC
     if (comp_id != vertex.data().comp_id)
       context.broadcast_signal(graphlab::ALL_EDGES);
#endif
}


struct model_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;

  std::string save_vertex(const vertex_type& vertex) const {
    return "";
  }
  std::string save_edge(const edge_type& edge) const {
    return boost::lexical_cast<std::string>(edge.data().id) + " " + boost::lexical_cast<std::string>(edge.data().comp_id) + std::string("\n");
  }
};

//reduce sizes of connected components
struct label_counter {
  std::map<uint, uint> counts;

  label_counter() { }

  explicit label_counter(size_t labelId) {
    counts[labelId] = 1;
  }

  label_counter& operator+=(const label_counter& other) {
    std::map<uint, uint>::const_iterator iter = other.counts.begin();
    while(iter != other.counts.end()) {
      counts[iter->first] += iter->second;
      ++iter;
    }
    return *this;
  }

  void save(graphlab::oarchive& oarc) const {
    oarc << counts;
  }
  void load(graphlab::iarchive& iarc) {
    iarc >> counts;
  }
};


label_counter get_comp_ide(const graph_type::edge_type& edge) {
  return label_counter(edge.data().comp_id);
}
label_counter get_comp_idv(const graph_type::vertex_type& vertex) {
  return label_counter(vertex.data().comp_id);
}


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description =
      "Compute connected component - by edges";
  graphlab::command_line_options clopts(description);
  std::string input_dir;
  clopts.attach_option("graph", input_dir,
                       "The directory containing the graph file");
  clopts.add_positional("graph");
  clopts.attach_option("output_file", output_file,
                       "The prefix (folder and filename) to save output_file.");
  clopts.attach_option("max_iter", max_iter, "max number of iterations");
  clopts.attach_option("debug", debug, "debug (verbose) mode");
  clopts.attach_option("p", p, "percentage for active node (optional)");
  clopts.attach_option("n", n, "total number of nodes (optional)");

  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  //dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer;
  graph_type graph(dc, clopts);
  graph.load(input_dir, graph_loader);
  dc.cout() << "Loading graph. Finished in "
            << timer.current_time() << std::endl;

  //dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  //dc.cout() << "Finalizing graph. Finished in "
  //          << timer.current_time() << std::endl;


  /*dc.cout()
      << "========== Graph statistics on proc " << dc.procid()
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: "
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------"
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: "
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
        << "\n Edge balance ratio: "
        << float(graph.num_local_edges())/graph.num_edges()
        << std::endl;
  */

  if (debug)
    //omp_set_num_threads(1);

  /* THE MAIN LOOP */
  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, clopts);
#ifndef SYNC
  engine.set_vertex_program(bond_percolation_function);
#endif
  engine.register_map_reduce(BOND_PERCOLATION_MAP_REDUCE, bond_percolation_map, bond_percolation_combine);

  engine.signal_all();
#ifndef SYNC
  engine.wait();
#endif

#ifdef SYNC
  /* FOR EACH ITERATION */
  for (int i=0; i< max_iter; i++){
     /* PERFORM UPDATE FUNCTION */
     engine.parfor_all_local_vertices(bond_percolation_function);
     /* WAIT UNTIL COMPLETION */
     engine.wait();
     /* CHECK FOR CONVERGENCE */
     size_t diff = graph.map_reduce_edges<size_t>(count_component);
     dc.cout() << "iter = " << i << " diff= " << diff << std::endl;
     if (diff == 0)
       break;
  }
#endif

  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
            << std::endl
            << "Final Runtime (seconds):   " << runtime;

  dc.cout() << " Num updates: " << engine.num_updates() << std::endl;
  // Make output_file ---------------------------------------------------------
  if(!output_file.empty()) {
    std::cout << "Saving output_file" << std::endl;
    const bool gzip_output = false;
    const bool save_vertices = false;
    const bool save_edges = true;
    const size_t threads_per_machine = 1;

    //save the output
    graph.save(output_file, model_saver(), gzip_output, save_vertices, save_edges, threads_per_machine);
  }

  //take statistics
  label_counter state = graph.map_reduce_edges<label_counter>(get_comp_ide);
  label_counter statv = graph.map_reduce_vertices<label_counter>(get_comp_idv);
  if (p > 0)
    dc.cout() << "site fraction p= " << p << std::endl;
  if (n > 0){
    dc.cout() << "n=" << n*p << std::endl;
    dc.cout() << "isolated sites: " << p*n-graph.num_vertices() << std::endl;
  }
  dc.cout() << "Number of sites: " << graph.num_vertices() << std::endl;
  dc.cout() << "Number of bonds: " << graph.num_edges() << std::endl;
  if (n){
  dc.cout() << "Percentage of sites: " << (double)graph.num_vertices() / n << std::endl;
  dc.cout() << "Percentage of bonds: " << (double)graph.num_edges() / (2.0*n) << std::endl;
  }
  dc.cout() << "SITES RESULT:\nsize\tcount\n";
  std::map<uint,uint> final_countsv;
  uint total_sites = 0;
  for (std::map<uint, uint>::const_iterator iter = statv.counts.begin();
      iter != statv.counts.end(); iter++) {
      //dc.cout() << iter->first << "\t" << iter->second << "\n";
      final_countsv[iter->second] += 1;
      total_sites += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countsv.begin();
      iter != final_countsv.end(); iter++) {
      dc.cout() << iter->first << "\t" << iter->second << "\n";
  }

  dc.cout() << "BONDS RESULT:\nsize\tcount\n";
  std::map<uint,uint> final_countse;
  uint total_bonds = 0;
  for (std::map<uint, uint>::const_iterator iter = state.counts.begin();
      iter != state.counts.end(); iter++) {
      //dc.cout() << iter->first << "\t" << iter->second << "\n";
      final_countse[iter->second] += 1;
      total_bonds += iter->second;
  }
  for (std::map<uint, uint>::const_iterator iter = final_countse.begin();
      iter != final_countse.end(); iter++) {
      dc.cout() << iter->first << "\t" << iter->second << "\n";
  }
  assert(total_sites == graph.num_vertices());
  assert(total_bonds == graph.num_edges());

  //shutdown MPI
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


================================================
FILE: toolkits/graph_analytics/warp_coloring.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
#include <graphlab/macros_def.hpp>
using namespace graphlab;

#define UNIQUE_COLOR_MAP_REDUCE 0
#define SIGNAL_IF_CHANGE 1

// The vertex data is the color of the vertex
typedef graphlab::vertex_id_type color_type;


struct edge_data_type : public graphlab::IS_POD_TYPE {
  bool dirty;
  bool locked;
  char owned_by; // 0 is source, 1 is other
  bool requested; // 1 is requested by other party;
};

std::ostream& operator<<(std::ostream& o, const edge_data_type& e) {
  o << e.dirty << " " << e.locked << " " << (int)e.owned_by << " " << e.requested;
  return o;
}

// The graph type is determined by the vertex and edge data types
typedef distributed_graph<color_type, edge_data_type> graph_type;

typedef gl3engine<graph_type> engine_type;

engine_type* eng;

bool EDGE_CONSISTENT = false;


/*
 * This is the gathering type which accumulates an (unordered) set of
 * all neighboring colors
 * It is a simple wrapper around a boost::unordered_set with
 * an operator+= which simply performs a set union.
 *
 * This struct can be significantly accelerated for small sets.
 * Small collections of vertex IDs should not require the overhead
 * of the unordered_set.
 */
struct set_union_gather {
  boost::unordered_set<color_type> colors;

  /*
   * Combining with another collection of vertices.
   * Union it into the current set.
   */
  set_union_gather& operator+=(const set_union_gather& other) {
    foreach(graphlab::vertex_id_type othervid, other.colors) {
      colors.insert(othervid);
    }
    return *this;
  }

  // serialize
  void save(graphlab::oarchive& oarc) const {
    oarc << colors;
  }

  // deserialize
  void load(graphlab::iarchive& iarc) {
    iarc >> colors;
  }
};

/**************************************************************************/
/*                                                                        */
/*                           Coloring Functions                           */
/*                                                                        */
/**************************************************************************/

set_union_gather unique_color_map(const graph_type::vertex_type& center,
                                  graph_type::edge_type& e,
                                  const graph_type::vertex_type& other) {
  set_union_gather gather;
  color_type other_color = other.data();
  gather.colors.insert(other_color);
  return gather;
}

void unique_color_combine(set_union_gather& v1, const set_union_gather& v2) {
  v1 += v2;
}


void schedule_neighbors_if_change(const graph_type::vertex_type& center,
                                  graph_type::edge_type& e,
                                  const graph_type::vertex_type& other) {
  if (center.data() == other.data()) {
    eng->get_context().signal(other);
  }
}

/**************************************************************************/
/*                                                                        */
/*                         Chandy Misra Functions                         */
/*                                                                        */
/**************************************************************************/
void initialize_chandy_misra(graph_type::edge_type& edge) {
  edge.data().dirty = true;
  edge.data().locked = false;
  edge.data().owned_by = edge.source().id() < edge.target().id() ? 0 : 1;
  edge.data().requested = false;
//   std::cout << edge.source().id() << "->" << edge.target().id() << " " << edge.data() << "\n";
}


#define LOCK_IF_OWNED 3
size_t lock_if_owned_map(const graph_type::vertex_type& center,
                         graph_type::edge_type& e,
                         const graph_type::vertex_type& other) {
  char m =  (e.source().id() == center.id()) ? 0 : 1;
  if (e.data().owned_by == m) e.data().locked = true;
  else if (e.data().dirty && e.data().locked == false) {
    e.data().owned_by = m;
    e.data().dirty = false;
    e.data().requested = false;
    e.data().locked = true;
  } else {
    e.data().requested = true;
  }
//   std::cout << "Lock If Owned: " << center.id() << ": Fork = "<< e.source().id() << "->" << e.target().id() << " "  << e.data() << "\n";
  return (e.data().owned_by == m && e.data().locked);
}

void lock_if_owned_combine(size_t& v1, const size_t& v2) {
  v1 += v2;
}


#define STOP_EATING 4
void stop_eating(const graph_type::vertex_type& center,
                  graph_type::edge_type& e,
                  const graph_type::vertex_type& other) {
  e.data().locked = false;
  e.data().dirty = true;
  if (e.data().requested) {
    // switch owner
    e.data().owned_by = !e.data().owned_by;
    e.data().dirty = false;
    e.data().requested = false;
  }

//   std::cout << "Stop Eating: " << center.id() << ": Fork = "<< e.source().id() << "->" << e.target().id() << " "  << e.data() << "\n";
}


#define UNLOCK_FORKS_MAINTAIN_REQUEST 5
void unlock_forks_maintain_request(const graph_type::vertex_type& center,
                                   graph_type::edge_type& e,
                                   const graph_type::vertex_type& other) {
  char m =  (e.source().id() == center.id()) ? 0 : 1;
  if (e.data().owned_by == m && e.data().locked) {
    e.data().locked = false;
  }

  if (e.data().owned_by == m && e.data().dirty && e.data().requested) {
    e.data().owned_by = !e.data().owned_by;
    e.data().dirty = false;
    e.data().requested = true;
  }

//   std::cout << "Release: " << center.id() << ": Fork = " << e.source().id() << "->" << e.target().id() << " " << e.data() << "\n";
}


void update_function(engine_type::context_type& context,
                     graph_type::vertex_type& vertex,
                     const engine_type::message_type& unused) {

  // acquire locks
  if (EDGE_CONSISTENT) {
    size_t expected_num_locks = vertex.num_in_edges() + vertex.num_out_edges();
    while(1) {
      size_t numnbr = context.map_reduce<size_t>(LOCK_IF_OWNED, ALL_EDGES);
      if (numnbr == expected_num_locks) {
        break;
      }
      else {
        context.edge_transform(UNLOCK_FORKS_MAINTAIN_REQUEST, ALL_EDGES);
      }
      graphlab::fiber_group::yield();
    }
  }
  set_union_gather neighborhood =
      context.map_reduce<set_union_gather>(UNIQUE_COLOR_MAP_REDUCE,
                                           ALL_EDGES);

  bool color_changed = false;

  size_t neighborhoodsize = neighborhood.colors.size();
  for (color_type curcolor = 0; curcolor < neighborhoodsize + 1; ++curcolor) {
    if (neighborhood.colors.count(curcolor) == 0) {
      vertex.data() = curcolor;
      break;
    }
  }
  if (EDGE_CONSISTENT) context.edge_transform(STOP_EATING, ALL_EDGES);

  context.edge_transform(SIGNAL_IF_CHANGE, ALL_EDGES, false);
}


/**************************************************************************/
/*                                                                        */
/*                         Validation   Functions                         */
/*                                                                        */
/**************************************************************************/
size_t validate_conflict(graph_type::edge_type& edge) {
  return edge.source().data() == edge.target().data();
}


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  mpi_tools::init(argc, argv);
  distributed_control dc;

  // Parse command line options -----------------------------------------------
  command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  clopts.set_scheduler_type("fifo");
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("format", format,
                       "The graph file format");
  clopts.attach_option("edgescope", EDGE_CONSISTENT,
                       "Set to 1 if edge consistency is to be used");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  graphlab::launch_metric_server();
  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }
  // must call finalize before querying the graph
  graph.finalize();

  graph.transform_edges(initialize_chandy_misra);

  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Running The Engine -------------------------------------------------------
  engine_type engine(dc, graph, clopts);
  eng = &engine;

  engine.register_map_reduce(UNIQUE_COLOR_MAP_REDUCE,
                             unique_color_map,
                             unique_color_combine);

  engine.register_edge_transform(SIGNAL_IF_CHANGE,
                                 schedule_neighbors_if_change);


  engine.register_map_reduce(LOCK_IF_OWNED,
                             lock_if_owned_map,
                             lock_if_owned_combine);

  engine.register_edge_transform(STOP_EATING,
                                 stop_eating);

  engine.register_edge_transform(UNLOCK_FORKS_MAINTAIN_REQUEST,
                                 unlock_forks_maintain_request);

  engine.set_vertex_program(update_function);
  timer ti; ti.start();
  engine.signal_all();

  engine.wait();
  dc.cout() << "Finished Running engine in " << ti.current_time()
            << " seconds." << std::endl;
  dc.cout() << engine.num_updates()
            << " updates." << std::endl;

  size_t conflict_count = graph.map_reduce_edges<size_t>(validate_conflict);
  dc.cout() << "Num conflicts = " << conflict_count << "\n";


  graphlab::stop_metric_server();

  mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation


================================================
FILE: toolkits/graph_analytics/warp_pagerank.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
// #include <graphlab/macros_def.hpp>
using namespace graphlab;

#define PAGERANK_MAP_REDUCE 0

// Global random reset probability
double RESET_PROB = 0.15;

double TOLERANCE = 1E-2;

// The vertex data is just the pagerank value (a double)
typedef double vertex_data_type;

// There is no edge data in the pagerank application
typedef empty edge_data_type;

// The graph type is determined by the vertex and edge data types
typedef distributed_graph<vertex_data_type, edge_data_type> graph_type;

typedef gl3engine<graph_type> engine_type;
/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


double pagerank_map(const graph_type::vertex_type& v) {
  return v.data() / v.num_out_edges();
}

void pagerank_combine(double& v1, const double& v2) {
  v1 += v2;
}

void update_function(engine_type::context_type& context,
                     graph_type::vertex_type& vertex,
                     const engine_type::message_type& unused) {
  double prev = vertex.data();
  vertex.data() = 0.15 + 0.85 *
      context.map_reduce<double>(PAGERANK_MAP_REDUCE, IN_EDGES);

  double last_change = std::fabs((vertex.data()- prev));// / vertex.num_out_edges());
  if (last_change > TOLERANCE) {
    context.broadcast_signal(OUT_EDGES);
  }
}

double pagerank_sum(graph_type::vertex_type v) {
  return v.data();
}


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  mpi_tools::init(argc, argv);
  distributed_control dc;

  // Parse command line options -----------------------------------------------
  command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  clopts.set_scheduler_type("fifo");
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  clopts.attach_option("format", format,
                       "The graph file format");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graphlab::launch_metric_server();
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  engine_type engine(dc, graph, clopts);
  engine.register_map_reduce(PAGERANK_MAP_REDUCE,
                             pagerank_map,
                             pagerank_combine);

  engine.set_vertex_program(update_function);
  timer ti; ti.start();
  engine.signal_all();

  engine.wait();
  dc.cout() << "Finished Running engine in " << ti.current_time()
            << " seconds." << std::endl;
  dc.cout() << engine.num_updates()
            << " updates." << std::endl;


  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  graphlab::stop_metric_server();
  mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation


================================================
FILE: toolkits/graph_analytics/warp_pagerank2.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */

#include <vector>
#include <string>
#include <fstream>

#include <graphlab.hpp>
#include <graphlab/engine/gl3engine.hpp>
// #include <graphlab/macros_def.hpp>
using namespace graphlab;

#define PAGERANK_MAP_REDUCE 0

// Global random reset probability
float RESET_PROB = 0.15;

float TOLERANCE = 1E-2;

// The vertex data is just the pagerank value (a float)
typedef float vertex_data_type;

// There is no edge data in the pagerank application
typedef empty edge_data_type;

// The graph type is determined by the vertex and edge data types
typedef distributed_graph<vertex_data_type, edge_data_type> graph_type;

typedef gl3engine<graph_type> engine_type;
/*
 * A simple function used by graph.transform_vertices(init_vertex);
 * to initialize the vertes data.
 */
void init_vertex(graph_type::vertex_type& vertex) { vertex.data() = 1; }


/*
 * We want to save the final graph so we define a write which will be
 * used in graph.save("path/prefix", pagerank_writer()) to save the graph.
 */
struct pagerank_writer {
  std::string save_vertex(graph_type::vertex_type v) {
    std::stringstream strm;
    strm << v.id() << "\t" << v.data() << "\n";
    return strm.str();
  }
  std::string save_edge(graph_type::edge_type e) { return ""; }
}; // end of pagerank writer


float pagerank_map(const graph_type::vertex_type& v) {
  return v.data() / v.num_out_edges();
}

void pagerank_combine(float& v1, const float& v2) {
  v1 += v2;
}

void update_function(engine_type::context_type& context,
                     graph_type::vertex_type& vertex) {
  vertex.data() = 0.15 + 0.85 *
      context.map_reduce<float>(PAGERANK_MAP_REDUCE, IN_EDGES);
}

float pagerank_sum(graph_type::vertex_type v) {
  return v.data();
}


int main(int argc, char** argv) {
  // Initialize control plain using mpi
  mpi_tools::init(argc, argv);
  distributed_control dc;
  global_logger().set_log_level(LOG_INFO);

  // Parse command line options -----------------------------------------------
  command_line_options clopts("PageRank algorithm.");
  std::string graph_dir;
  std::string format = "adj";
  clopts.set_scheduler_type("fifo");
  clopts.attach_option("graph", graph_dir,
                       "The graph file.  If none is provided "
                       "then a toy graph will be created");
  clopts.add_positional("graph");
  clopts.attach_option("tol", TOLERANCE,
                       "The permissible change at convergence.");
  clopts.attach_option("format", format,
                       "The graph file format");
  size_t powerlaw = 0;
  clopts.attach_option("powerlaw", powerlaw,
                       "Generate a synthetic powerlaw out-degree graph. ");
  size_t iterations = 10;
  clopts.attach_option("iterations", iterations,
                       "Number of asynchronous iterations to run");
  std::string saveprefix;
  clopts.attach_option("saveprefix", saveprefix,
                       "If set, will save the resultant pagerank to a "
                       "sequence of files with prefix saveprefix");

  if(!clopts.parse(argc, argv)) {
    dc.cout() << "Error in parsing command line arguments." << std::endl;
    return EXIT_FAILURE;
  }

  // Build the graph ----------------------------------------------------------
  graph_type graph(dc, clopts);
  if(powerlaw > 0) { // make a synthetic graph
    dc.cout() << "Loading synthetic Powerlaw graph." << std::endl;
    graph.load_synthetic_powerlaw(powerlaw, false, 2.1, 100000000);
  }
  else if (graph_dir.length() > 0) { // Load the graph from a file
    dc.cout() << "Loading graph in format: "<< format << std::endl;
    graph.load_format(graph_dir, format);
  }
  else {
    dc.cout() << "graph or powerlaw option must be specified" << std::endl;
    clopts.print_description();
    return 0;
  }
  // must call finalize before querying the graph
  graph.finalize();
  dc.cout() << "#vertices: " << graph.num_vertices()
            << " #edges:" << graph.num_edges() << std::endl;

  // Initialize the vertex data
  graph.transform_vertices(init_vertex);

  // Running The Engine -------------------------------------------------------
  engine_type engine(dc, graph, clopts);
  engine.register_map_reduce(PAGERANK_MAP_REDUCE,
                             pagerank_map,
                             pagerank_combine);

  timer ti;
  for (size_t i = 0;i < iterations; ++i) {
    engine.parfor_all_local_vertices(update_function);
    std::cout << "Iteration " << i << " complete\n";
    engine.wait();
  }

  dc.cout() << "Finished Running engine in " << ti.current_time()
            << " seconds." << std::endl;
  dc.cout() << engine.num_updates()
            << " updates." << std::endl;


  // Save the final graph -----------------------------------------------------
  if (saveprefix != "") {
    graph.save(saveprefix, pagerank_writer(),
               false,    // do not gzip
               true,     // save vertices
               false);   // do not save edges
  }

  mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


// We render this entire program in the documentation


================================================
FILE: toolkits/graphical_models/CMakeLists.txt
================================================
project(GraphicalModels)


add_graphlab_executable(dd dd_main.cpp)
requires_eigen(dd)

add_graphlab_executable(lbp_structured_prediction lbp_structured_prediction.cpp)
requires_eigen(lbp_structured_prediction)

add_graphlab_executable(profile_lbp_synthetic profile_lbp_synthetic.cpp)
requires_eigen(profile_lbp_synthetic)


add_graphlab_executable(profile_lbp_synthetic2 profile_lbp_synthetic2.cpp)
requires_eigen(profile_lbp_synthetic2)


add_graphlab_executable(synthetic_image_data synthetic_image_data.cpp)
requires_opencv(synthetic_image_data)


add_graphlab_executable(mplp_denoise mplp_denoise.cpp)
requires_eigen(mplp_denoise)
requires_opencv(mplp_denoise)


subdirs(factors)


================================================
FILE: toolkits/graphical_models/ad3_qp.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application performs MAP inference on Markov Nets 
 * provided in standard UAI file format via Dual-Decomposition. 
 *
 *
 *  \authors Dhruv Batra, André Martins, Aroma Mahendru
 */


#ifndef _AD3_QP_HPP_
#define _AD3_QP_HPP_


#include <Eigen/Eigenvalues>
#include <math.h>
#include <limits>
#include "dd_grlab.hpp"


#define NEARLY_ZERO_TOL(a,tol) (((a)<=(tol)) && ((a)>=(-(tol))))
#define NEARLY_EQ_TOL(a,b,tol) (((a)-(b))*((a)-(b))<=(tol))
#define num_max_iterations_QP_ 10
#define EXP 10


////////////////////////////////////////////////////////////////////////////////
// This class implements the Alternating Directions Dual Decompostion as 
// described in:
//
// André F. T. Martins, Mário A. T. Figueiredo, Pedro M. Q. Aguiar,
// Noah A. Smith, and Eric P. Xing.
// "Alternating Directions Dual Decomposition"
// Arxiv preprint arXiv:1212.6550, 2012.
/////////////////////////////////////////////////////////////////////////////////

struct ad3_vertex_program:public admm_vertex_program {

/**
 * \brief Maximize returns the maximum value and configuration with reference to 
 * input additional and variable log potentials. addtional log potential corresponds 
 * factor potentials and variable potential corresponds to sum of lagrange 
 * multipliers and unary potentials divided by degree of the unary vertex.
 */


void Maximize(vertex_type& vertex, vec additional_log_potentials, vec variable_log_potentials,
                Configuration &configuration,
                double *value) {
          
    vector <Configuration> states(vertex.data().nvars,-1);
    *value = -1e12;
    for (int index = 0;
         index < additional_log_potentials.size();
         ++index) {
      double score = additional_log_potentials[index];
      get_configuration_states(vertex,index, &states);
      int offset = 0;
      for (int i = 0; i < vertex.data().nvars; ++i) {
        score += variable_log_potentials[offset+states[i]];
        offset = vertex.data().cards[i];
        
      }
      
      if (configuration < 0 || score > *value) {
        configuration = index;
        *value = score;
      }
    }
    assert(configuration >= 0);
    
  }

 
   // Delete configuration.
 void DeleteConfiguration(Configuration *configuration) {
    Configuration *conf = configuration;
    delete conf;
  }
  
void DeleteConfiguration(vector <Configuration*> configuration) {
    for(int i=0; i< configuration.size(); i++){
    Configuration *conf = configuration[i];
    delete conf;}
  }

/**
 * \brief InvertAfterInsertion function is used to invert the matrix. Used in solveQP
 */

 bool InvertAfterInsertion(vertex_type& vertex, vector <double> & inverse_A_,
        const vector<Configuration> &active_set, const Configuration &inserted_element) {

  vector<double> inverse_A = inverse_A_;
  int size_A = active_set.size() + 1;
  vector<double> r(size_A);

  r[0] = 1.0;
  for (int i = 0; i < active_set.size(); ++i) {
    // Count how many variable values the new assignment
    // have in common with the i-th assignment.
    int num_common_values = CountCommonValues(vertex, active_set[i], inserted_element);
    r[i+1] = static_cast<double>(num_common_values);
  }

  double r0 = static_cast<double>(CountCommonValues(vertex,
      inserted_element, inserted_element));
  double s = r0;
  for (int i = 0; i < size_A; ++i) {
    if (r[i] == 0.0) continue;
    s -= r[i] * r[i] * inverse_A[i * size_A + i];
    for (int j = i+1; j < size_A; ++j) {
      if (r[j] == 0.0) continue;
      s -= 2 * r[i] * r[j] * inverse_A[i * size_A + j];
    }
  }

    if (NEARLY_ZERO_TOL(s, 1e-9)) {
         if (opts.verbose> 2) {
      cout << "Warning: updated matrix will become singular after insertion."
           << endl;
    }
    return false;
  }

  double invs = 1.0 / s;
  vector<double> d(size_A, 0.0);
  for (int i = 0; i < size_A; ++i) {
    if (r[i] == 0.0) continue;
    for (int j = 0; j < size_A; ++j) {
      d[j] += inverse_A[i * size_A + j] * r[i];
    }
  }

  int size_A_after = size_A + 1;
  inverse_A_.resize(size_A_after * size_A_after);
  for (int i = 0; i < size_A; ++i) {
    for (int j = 0; j < size_A; ++j) {
      inverse_A_[i * size_A_after + j] = inverse_A[i * size_A + j] +
          invs * d[i] * d[j];
    }
    inverse_A_[i * size_A_after + size_A] = -invs * d[i];
    inverse_A_[size_A * size_A_after + i] = -invs * d[i];
  }
  inverse_A_[size_A * size_A_after + size_A] = invs;

  return true;
}

/**
 * \brief InvertAfterRemoval function is used to invert the matrix. Used in solveQP
 */
void InvertAfterRemoval(vector <double> &inverse_A_,const vector<Configuration> &active_set,
                                       int removed_index) {
  vector<double> inverse_A = inverse_A_;
  int size_A = active_set.size() + 1;
  vector<double> r(size_A);

  ++removed_index; // Index in A has an offset of 1.
  double invs = inverse_A[removed_index * size_A + removed_index];
  assert(!NEARLY_ZERO_TOL(invs, 1e-12));
  double s = 1.0 / invs;
  vector<double> d(size_A - 1, 0.0);
  int k = 0;
  for (int i = 0; i < size_A; ++i) {
    if (i == removed_index) continue;
    d[k] = -s * inverse_A[removed_index * size_A + i];
    ++k;
  }

  int size_A_after = size_A - 1;
  inverse_A_.resize(size_A_after * size_A_after);
  k = 0;
  for (int i = 0; i < size_A; ++i) {
    if (i == removed_index) continue;
    int l = 0;
    for (int j = 0; j < size_A; ++j) {
      if (j == removed_index) continue;
      inverse_A_[k * size_A_after + l] = inverse_A[i * size_A + j] -
          invs * d[k] * d[l];
      ++l;
    }
    ++k;
  }
}

/**
 * \brief ComputeActiveSetSimilarities computes Mnz'*Mnz. Used in solveQP
 */
void ComputeActiveSetSimilarities(vertex_type& vertex,
    const vector<Configuration> &active_set,
    vector<double> *similarities) {
  int size = active_set.size();

  // Compute similarity matrix.
  similarities->resize(size * size);
  (*similarities)[0] = 0.0;
  for (int i = 0; i < active_set.size(); ++i) {
    (*similarities)[i*size + i] = static_cast<double>(
        CountCommonValues(vertex,active_set[i], active_set[i]) );
    for (int j = i+1; j < active_set.size(); ++j) {
      // Count how many variable values the i-th and j-th 
      // assignments have in common.
      int num_common_values = CountCommonValues(vertex,active_set[i], active_set[j]);
      (*similarities)[i*size + j] = num_common_values;
      (*similarities)[j*size + i] = num_common_values;
    }
  }
}

/**
 * \brief  ComputeMarginalsFromSparseDistribution computes marginalvalues for unary 
 * factor from given factor distribution.
 */
 
void ComputeMarginalsFromSparseDistribution( vertex_type& vertex, 
    const vector<Configuration> &active_set,
    const vector<double> &distribution,
    vec  &variable_posteriors,
    vec &additional_posteriors) {
    variable_posteriors.setZero();           
    additional_posteriors.setZero();  
    for (int i = 0; i < active_set.size(); ++i) {
    UpdateMarginalsFromConfiguration(vertex,active_set[i],
                                       distribution[i],
                                       variable_posteriors,
                                       additional_posteriors);
    }
  }
  
  
   // Given a configuration with a probability (weight), 
  // increment the vectors of variable and additional posteriors.
  void UpdateMarginalsFromConfiguration(vertex_type& vertex,
    const Configuration &configuration,
    double weight,
    vec &variable_posteriors,
    vec &additional_posteriors) {
    
     vector <Configuration> states(vertex.data().nvars, -1);
     get_configuration_states(vertex, configuration, &states);
     
            int offset = 0;
            
            for (int k = 0; k < vertex.data().nvars; ++k) 
            {   variable_posteriors[offset + states[k]] += weight;
                offset += vertex.data().cards[k];
            }
    additional_posteriors[configuration] += weight;
 
  }
  // Count how many common values two configurations have.
  int CountCommonValues(vertex_type& vertex,Configuration configuration1,
                        Configuration configuration2) {
    
    //assert(states1->size() == states2->size());
    int count = 0;
    vector <Configuration> states1(vertex.data().nvars, -1); 
    vector <Configuration> states2(vertex.data().nvars, -1);
    get_configuration_states(vertex, configuration1, &states1);
    get_configuration_states(vertex, configuration2, &states2);
    for(int i = 0; i< vertex.data().nvars; i++)
    {  if (states1[i] == states2[i])
      { count++;} }
    return count;
  }
  

/**
 * \brief Evaluate returns the maximum value  with reference to 
 * input additional and variable log potentials and configuration. addtional 
 * log potential corresponds factor potentials and variable potential corresponds 
 * to sum of lagrange  * multipliers and unary potentials divided by degree of 
 * the unary vertex.
 */
  
  
void Evaluate(vertex_type& vertex, vec additional_log_potentials, vec variable_log_potentials,
                const Configuration configuration,
                double *value) {
          
    vector<Configuration> states(vertex.data().nvars, -1);
    get_configuration_states(vertex, configuration, &states);
    *value = 0.0;
    int offset = 0;
    for (int i = 0;i<vertex.data().nvars; ++i) {
      *value += variable_log_potentials[offset + states[i]];
      offset = vertex.data().cards[i]; 
    }
    *value += additional_log_potentials[configuration];
  }
  
  
  void EigenDecompose(vector<double> *similarities,
                            vector<double> *eigenvalues) {

  int size = sqrt(similarities->size());

  Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> es;
  Eigen::MatrixXd sim(size, size);
  int t = 0;
  for (int i = 0; i < size; ++i) {
    for (int j = 0; j < size; ++j) {
      sim(i, j) = (*similarities)[t];
      ++t;
    }
  }
  es.compute(sim);
  const Eigen::VectorXd &eigvals = es.eigenvalues(); 
  eigenvalues->resize(size);
  for (int i = 0; i < size; ++i) {
    (*eigenvalues)[i] = eigvals[i];
  }
  const Eigen::MatrixXd &eigvectors = es.eigenvectors().transpose();
  t = 0;
  for (int i = 0; i < size; ++i) {
    for (int j = 0; j < size; ++j) {
      (*similarities)[t] = eigvectors(i, j);
      ++t;
    }
  }

}


// Function to solve each quadratic sub problem for dense factors. 
// It uses active set method. Caching is deactivated
// TODO: Activate caching feature

void SolveQP_dense(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors) {
   vertex_data& vdata = vertex.data();                        

   vec additional_log_potentials = vdata.potentials;
   vec variable_log_potentials = total.neighbor_distribution + total.messages;      
   
  // Initialize the active set.
  
   if (vdata.active_set_.size() == 0) {
    variable_posteriors.resize(variable_log_potentials.size());     
    additional_posteriors.resize(additional_log_potentials.size()); 
    vdata.distribution_.clear();
    // Initialize by solving the LP, discarding the quadratic
    // term.
    Configuration configuration = -1;
    double value;
    Maximize(vertex, additional_log_potentials, variable_log_potentials,
             configuration,
             &value);
    vdata.active_set_.push_back(configuration);
    vdata.distribution_.push_back(1.0);

    // Initialize inv(A) as [-M,1;1,0].
    vdata.inverse_A_.resize(4);
    vdata.inverse_A_[0] = static_cast<double>(
        -CountCommonValues(vertex,configuration, configuration));
    vdata.inverse_A_[1] = 1;
    vdata.inverse_A_[2] = 1;
    vdata.inverse_A_[3] = 0;
  }

  bool changed_active_set = true;
  vector<double> z;
  int num_max_iterations = num_max_iterations_QP_;
  double tau = 0;
  for (int iter = 0; iter < num_max_iterations; ++iter) {
    bool same_as_before = true;
    bool unbounded = false;
    if (changed_active_set) {
      // Recompute vector b.
      vector<double> b(vdata.active_set_.size() + 1, 0.0);
      b[0] = 1.0;
      for (int i = 0; i < vdata.active_set_.size(); ++i) {
        const Configuration &configuration = vdata.active_set_[i];
        double score;
        Evaluate(vertex, additional_log_potentials, variable_log_potentials,
                 configuration,
                 &score);
        b[i+1] = score;
      }
      // Solve the system Az = b.
      z.resize(vdata.active_set_.size());
      int size_A = vdata.active_set_.size() + 1;
      for (int i = 0; i < vdata.active_set_.size(); ++i) {
        z[i] = 0.0;
        for (int j = 0; j < size_A; ++j) {
          z[i] += vdata.inverse_A_[(i+1) * size_A + j] * b[j];
        }
      }
      tau = 0.0;
      for (int j = 0; j < size_A; ++j) {
        tau += vdata.inverse_A_[j] * b[j];
      }

      same_as_before = false;
    }

    if (same_as_before) {
      // Compute the variable marginals from the full distribution
      // stored in z.
      ComputeMarginalsFromSparseDistribution(vertex, vdata.active_set_,
                                             z,
                                             variable_posteriors,
                                             additional_posteriors);
      // Get the most violated constraint
      // (by calling the black box that computes the MAP).
      vec scores = variable_log_potentials;               
      for (int i = 0; i < scores.size(); ++i) {
        scores[i] -= variable_posteriors[i];
      }
      Configuration configuration = -1;
      double value = 0.0;
      
      Maximize(vertex,
                additional_log_potentials, scores,
               configuration,
               &value);
      double very_small_threshold = 1e-9;
      if (value <= tau + very_small_threshold) { // value <= tau.
        // We have found the solution;
        // the distribution, active set, and inv(A) are cached for the next round.
        //DeleteConfiguration(configuration);
        return;
      } else {
        for (int k = 0; k < vdata.active_set_.size(); ++k) {
          // This is expensive and should just be a sanity check.
          // However, in practice, numerical issues force an already existing
          // configuration to try to be added. Therefore, we always check
          // if a configuration already exists before inserting it.
          // If it does, that means the active set method converged to a
          // solution (but numerical issues had prevented us to see it.)
          if (vdata.active_set_[k] == configuration) {                         
            if (opts.verbose > 2) {
              cout << "Warning: value - tau = "
                   << value - tau << " " << value << " " << tau
                   << endl;
            }
            // We have found the solution;
            // the distribution, active set, and inv(A)
            // are cached for the next round.
            //DeleteConfiguration(configuration);

            // Just in case, clean the cache.
            // This may prevent eventual numerical problems in the future.
            for (int j = 0; j < vdata.active_set_.size(); ++j) {
              if (j == k) continue; // This configuration was deleted already.
              //DeleteConfiguration(active_set_[j]);
            }
            vdata.active_set_.clear();
            vdata.inverse_A_.clear();
            vdata.distribution_.clear();

            // Return.
            return;
          }
        }
        z.push_back(0.0);
        vdata.distribution_ = z;

        // Update inv(A).
        bool singular = !InvertAfterInsertion(vertex, vdata.inverse_A_, vdata.active_set_, configuration);
        if (singular) {
          // If adding a new configuration causes the matrix to be singular,
          // don't just add it. Instead, look for a configuration in the null
          // space and remove it before inserting the new one.
          // Right now, if more than one such configuration exists, we just
          // remove the first one we find. There's a chance this could cause
          // some cyclic behaviour. If that is the case, we should randomize
          // this choice.
          // Note: This step is expensive and requires an eigendecomposition.
          // TODO: I think there is a graph interpretation for this problem.
          // Maybe some specialized graph algorithm is cheaper than doing
          // the eigendecomposition.
          vector<double> similarities(vdata.active_set_.size() * vdata.active_set_.size());
          ComputeActiveSetSimilarities(vertex,vdata.active_set_, &similarities);
          
          //cout<<"compute active similarities in solveQP .."<<endl;
          vector<double> padded_similarities((vdata.active_set_.size()+2) * 
                                             (vdata.active_set_.size()+2), 1.0);
          for (int i = 0; i < vdata.active_set_.size(); ++i) {
            for (int j = 0; j < vdata.active_set_.size(); ++j) {
              padded_similarities[(i+1)*(vdata.active_set_.size()+2) + (j+1)] =
                  similarities[i*vdata.active_set_.size() + j];
            }
          }
          padded_similarities[0] = 0.0;
          for (int i = 0; i < vdata.active_set_.size(); ++i) {
            double value = static_cast<double>(
                CountCommonValues(vertex, configuration, vdata.active_set_[i]));
            padded_similarities[(i+1)*(vdata.active_set_.size()+2) +
                                (vdata.active_set_.size()+1)] = value;
            padded_similarities[(vdata.active_set_.size()+1)*(vdata.active_set_.size()+2) +
                                (i+1)] = value;
          }
          double value = static_cast<double>(
              CountCommonValues(vertex, configuration, configuration));
          padded_similarities[(vdata.active_set_.size()+1)*(vdata.active_set_.size()+2) +
                              (vdata.active_set_.size()+1)] = value;

          vector<double> eigenvalues(vdata.active_set_.size()+2);
          EigenDecompose(&padded_similarities, &eigenvalues);
          int zero_eigenvalue = -1;
          for (int i = 0; i < vdata.active_set_.size()+2; ++i) {
            if (NEARLY_EQ_TOL(eigenvalues[i], 0.0, 1e-9)) {
              if (zero_eigenvalue >= 0) {
                // If this happens, something failed. Maybe a numerical problem
                // may cause this. In that case, just give up, clean the cache
                // and return. Hopefully the next iteration will fix it.
                cout << "Multiple zero eigenvalues: "
                     << eigenvalues[zero_eigenvalue] << " and "
                     << eigenvalues[i] << endl;
                cout << "Warning: Giving up." << endl;
                // Clean the cache.
                for (int j = 0; j < vdata.active_set_.size(); ++j) {
                  //DeleteConfiguration(active_set_[j]);
                }
                vdata.active_set_.clear();
                vdata.inverse_A_.clear();
                vdata.distribution_.clear();
                return;
              }
              zero_eigenvalue = i;
            }
          }
          assert(zero_eigenvalue >= 0);
          vector<int> configurations_to_remove;
          for (int j = 1; j < vdata.active_set_.size()+1; ++j) {
            double value = padded_similarities[zero_eigenvalue*(vdata.active_set_.size()+2) + j];
            if (!NEARLY_EQ_TOL(value, 0.0, 1e-9)) {
              configurations_to_remove.push_back(j-1);
            }
          }
          if (opts.verbose > 2) {
            cout << "Pick a configuration to remove (" << configurations_to_remove.size()
                 << " out of " << vdata.active_set_.size() << ")." << endl;
          }

          assert(configurations_to_remove.size() >= 1);
          int j = configurations_to_remove[0];

          // Update inv(A).
          InvertAfterRemoval(vdata.inverse_A_, vdata.active_set_, j);

          // Remove blocking constraint from the active set.
          //DeleteConfiguration(active_set_[j]); // Delete configutation.
          vdata.active_set_.erase(vdata.active_set_.begin() + j);

          singular = !InvertAfterInsertion(vertex, vdata.inverse_A_, vdata.active_set_, configuration);
          assert(!singular);
        }

        // Insert configuration to active set.
        if (opts.verbose > 2) {
          cout << "Inserted one element to the active set (iteration "
               << iter << ")." << endl;
        }
        vdata.active_set_.push_back(configuration);
        changed_active_set = true;
      }      
    } else {
      // Solution has changed from the previous iteration.
      // Look for blocking constraints.
      int blocking = -1;
      bool exist_blocking = false;
      double alpha = 1.0;
      for (int i = 0; i < vdata.active_set_.size(); ++i) {
        assert(vdata.distribution_[i] >= -1e-12);
        if (z[i] >= vdata.distribution_[i]) continue;
        if (z[i] < 0) exist_blocking = true;
        double tmp = vdata.distribution_[i] / (vdata.distribution_[i] - z[i]);
        if (blocking < 0 || tmp < alpha) {
          alpha = tmp;
          blocking = i;
        }
      }

      if (!exist_blocking) {
        // No blocking constraints.
        assert(!unbounded);
        vdata.distribution_ = z;
        alpha = 1.0;
        changed_active_set = false;
      } else {
        if (alpha > 1.0 && !unbounded) alpha = 1.0;
        // Interpolate between factor_posteriors_[i] and z.
        if (alpha == 1.0) {
          vdata.distribution_ = z;
        } else {
          for (int i = 0; i < vdata.active_set_.size(); ++i) {
            z[i] = (1 - alpha) * vdata.distribution_[i] + alpha * z[i];
            vdata.distribution_[i] = z[i];
          }
        }

        // Update inv(A).
        InvertAfterRemoval(vdata.inverse_A_, vdata.active_set_, blocking);

        // Remove blocking constraint from the active set.
        if (opts.verbose > 2) {
          cout << "Removed one element to the active set (iteration "
               << iter << ")." << endl;
        }

        //DeleteConfiguration(active_set_[blocking]); // Delete configutation.
        vdata.active_set_.erase(vdata.active_set_.begin() + blocking);

        z.erase(z.begin() + blocking);
        vdata.distribution_.erase(vdata.distribution_.begin() + blocking);
        changed_active_set = true;
        for (int i = 0; i < vdata.distribution_.size(); ++i) {
          assert(vdata.distribution_[i] > -1e-16);
        }
      }
    }
  }

  // Maximum number of iterations reached.
  // Return the best existing solution by computing the variable marginals 
  // from the full distribution stored in z.
  //assert(false);
  ComputeMarginalsFromSparseDistribution(vertex, vdata.active_set_,
                                         z,
                                         variable_posteriors,
                                         additional_posteriors); 

  }
  
  void InsertionSort(pair<double, int> arr[], int length) {
  int i, j;
  pair<double, int> tmp;

  for (i = 1; i < length; i++) {
    j = i;
    while (j > 0 && arr[j - 1].first > arr[j].first) {
      tmp = arr[j];
      arr[j] = arr[j - 1];
      arr[j - 1] = tmp;
      j--;
    }
  }
}

  
  int project_onto_budget_constraint_cached(vec& x,
                                          int d,
                                          double budget, 
                                          vector<pair<double,int> >& y) {
  int j, k, l, level;
  double s = 0.0;
  double tau = 0.0, tightsum;
  double left, right = -std::numeric_limits<double>::infinity();

  // Load x into a reordered y (the reordering is cached).
  if (y.size() != d) {
    y.resize(d);
    for (j = 0; j < d; j++) {
      s -= x[j];
      y[j].first = -x[j];
      y[j].second = j;
    }
    sort(y.begin(), y.end());
  } else {
    for (j = 0; j < d; j++) {
      s -= x[j];
      y[j].first = -x[y[j].second];
    }
    // If reordering is cached, use a sorting algorithm 
    // which is fast when the vector is almost sorted.
    InsertionSort(&y[0], d);
  }

  tightsum = s;
  s += budget;
  
  k = l = level = 0;
  bool found = false;
  double val_a, val_b;
  while (k < d && l < d) {
    if (level != 0) {
      tau = (s - tightsum) / static_cast<double>(level);
    }
    if (k < d) val_a = y[k].first;
    val_b = 1.0 + y[l].first;
    left = right;
    if (k == d || val_b <= val_a) {
      right = val_b;
    } else {
      right = val_a;
    }
    if ((level == 0 && s == tightsum) || (level != 0 && tau <= right)) {
      // Found the right split-point!
      found = true;
      break;
    }
    if (k == d || val_b <= val_a) {
      tightsum += val_b;
      --level;
      ++l;
    } else {
      tightsum -= val_a;
      ++level;
      ++k;
    }
  }

  if (!found) {
    left = right;
    right = std::numeric_limits<double>::infinity();
  }
      
  for (j = 0; j < d; j++) {
    if (-x[j] >= right) {
      x[j] = 0.0;
    } else if (1.0 - x[j] <= left) {
      x[j] = 1.0;
    } else {
      x[j] += tau;
    }
  }

  return 0;
}

  
  int project_onto_budget_constraint(vec& x, int d, double budget) {
  int j, k, l, level;
  double s = 0.0;
  double tau = 0.0, tightsum;
  double left, right = -std::numeric_limits<double>::infinity();
  vector<double> y(d, 0.0);

  for (j = 0; j < d; j++) {
    s -= x[j];
    y[j] = -x[j];
  }
  sort(y.begin(), y.end());
  tightsum = s;
  s += budget;
  
  k = l = level = 0;
  bool found = false;
  double val_a, val_b;
  while (k < d && l < d) {
    if (level != 0) {
      tau = (s - tightsum) / static_cast<double>(level);
    }
    if (k < d) val_a = y[k];
    val_b = 1.0 + y[l];
    left = right;
    if (k == d || val_b <= val_a) {
      right = val_b;
    } else {
      right = val_a;
    }
    if ((level == 0 && s == tightsum) || (level != 0 && tau <= right)) {
      // Found the right split-point!
      found = true;
      break;
    }
    if (k == d || val_b <= val_a) {
      tightsum += val_b;
      --level;
      ++l;
    } else {
      tightsum -= val_a;
      ++level;
      ++k;
    }
  }

  if (!found) {
    left = right;
    right = std::numeric_limits<double>::infinity();
  }
      
  for (j = 0; j < d; j++) {
    if (-x[j] >= right) {
      x[j] = 0.0;
    } else if (1.0 - x[j] <= left) {
      x[j] = 1.0;
    } else {
      x[j] += tau;
    }
  }

  return 0;
}
  
  // Solve the QP subproblem for budget factor.
  // TODO Enable caching
void SolveQP_budget(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors){
            
  vertex_data& vdata =  vertex.data();
  vec variable_log_potentials = total.neighbor_distribution + total.messages;
  for (int f = 0; f < variable_log_potentials.size(); ++f) {
    variable_posteriors[f] = variable_log_potentials[f];
    if (variable_posteriors[f] < 0.0) {
      variable_posteriors[f] = 0.0;
    } else if (variable_posteriors[f] > 1.0) {
      variable_posteriors[f] = 1.0;
    }
  }

  double s = 0.0;
  for (int f = 0; f < vdata.nvars; ++f) {
    s += variable_posteriors[f];
  }

  if (s > static_cast<double>(vdata.budget)) {
    for (int f = 0; f < variable_log_potentials.size(); ++f) {
      variable_posteriors[f] = variable_log_potentials[f];
    }
    project_onto_budget_constraint_cached(variable_posteriors, 
                                          variable_log_potentials.size(), 
                                          static_cast<double>(vdata.budget), vdata.last_sort_);
  }
}

// Finds best configuration of budget factors
void SolveMAP_budget(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value) {
 
   vertex_data& vdata = vertex.data();
  // Create a local copy of the log potentials.
   vec log_potentials(total.messages); 
  double valaux;
 
  value = 0.0;
  
  int num_active = 0;
  double sum = 0.0;

  for (int f = 0; f < vdata.nvars; ++f) {
    valaux = log_potentials[f];
    if (valaux < 0.0) {
      variable_posteriors[f] =  0.0;
    } else {
      sum += valaux;
      variable_posteriors[f] = 1.0;
    }
    ++num_active;
  }
  if (num_active > vdata.budget) {
    vector<pair<double,int> > scores(vdata.nvars);
    for (int f = 0; f < vdata.nvars; ++f) {
      scores[f].first = -log_potentials[f];
      scores[f].second = f;
    }

    sort(scores.begin(), scores.end());
    num_active = 0;
    sum = 0.0;
    for (int k = 0; k < vdata.budget; ++k) {
      valaux = -scores[k].first;
      if (valaux < 0.0) break;
      int f = scores[k].second;
      variable_posteriors[f] = 1.0;
      sum += valaux;
      ++num_active;      
    }

    for (int k = num_active; k < vdata.nvars; ++k) {
      int f = scores[k].second;
      variable_posteriors[f] = 0.0;
    }   
  }
  
  value += sum;
  
  
}

// Finds best configuration for dense factors
void SolveMAP_dense(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value){
     vertex_data& vdata = vertex.data();
     vec beliefs = vdata.potentials;         
     int num_configurations = vdata.potentials.size();
     for (int index_configuration = 0;
           index_configuration < num_configurations;
            ++index_configuration) {
        vector<int> states(vdata.nvars, -1);
        get_configuration_states(vertex, index_configuration, &states);
        int offset = 0;
        for (int k = 0; k < vdata.nvars; ++k) {
             beliefs[index_configuration] += total.messages[offset + states[k]];
             offset += vdata.cards[k];}
    } 
            
        value = beliefs.maxCoeff();
 
 }

int project_onto_simplex_cached(vec& x,	int d, double r, 
				vector<pair<double,int> >& y) {
  int j;
  double s = 0.0;
  double tau;

  // Load x into a reordered y (the reordering is cached).
  if (y.size() != d) {
    y.resize(d);
    for (j = 0; j < d; j++) {
      s += x[j];
      y[j].first = x[j];
      y[j].second = j;
    }
    sort(y.begin(), y.end());
  } else {
    for (j = 0; j < d; j++) {
      s += x[j];
      y[j].first = x[y[j].second];
    }
    // If reordering is cached, use a sorting algorithm 
    // which is fast when the vector is almost sorted.
    InsertionSort(&y[0], d);
  }

  for (j = 0; j < d; j++) {
    tau = (s - r) / ((double) (d - j));
    if (y[j].first > tau) break;
    s -= y[j].first;
  }

  for (j = 0; j < d; j++) {
    if (x[j] < tau) {
      x[j] = 0.0;
    } else {
      x[j] -= tau;
    }
  }

  return 0;
}

void SolveQP_xor(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors) {
  vertex_data& vdata =  vertex.data();
  //vec variable_posteriors = total.neighbor_distribution + total.messages;
  for (int f = 0; f < variable_posteriors.size(); ++f) {
    variable_posteriors[f] = total.neighbor_distribution[f] + total.messages[f]; 
    //cout<<vdata.potentials[f]<<" "<<total.neighbor_distribution[f]<<" "<<total.messages[f]<<endl;
    }
   // cout<<"v "<<variable_posteriors;
   //cout<<endl;
    project_onto_simplex_cached(variable_posteriors,
                              vdata.potentials.size(), 1.0, vdata.last_sort_);
  //cout<<"v2 "<<variable_posteriors;

}

void SolveMAP_xor(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value) {
  vertex_data& vdata =  vertex.data();
  vec log_potentials = total.messages;

  int first = -1;
  value = 0.0;

  for (int f = 0; f < log_potentials.size(); ++f) {
    if (first < 0 || log_potentials[f] > log_potentials[first]) first = f;
  }
  value += log_potentials[first];
  variable_posteriors.setZero();
  variable_posteriors[first] = 1.0;
};

// Finds beliefs using dense and budget factors
void compute_beliefs(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors){
switch(vertex.data().factor_type){

case DENSE: SolveQP_dense(vertex,total, variable_posteriors, additional_posteriors);
        break;
case BUDGET: SolveQP_budget(vertex,total, variable_posteriors, additional_posteriors);
         break;

case XOR: SolveQP_xor(vertex,total, variable_posteriors, additional_posteriors);

}
};

// General solveMAP function
void SolveMAP(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value){
switch(vertex.data().factor_type){
case XOR: SolveMAP_xor(vertex,total, variable_posteriors, additional_posteriors, value);
          break;
case DENSE: SolveMAP_dense(vertex,total, variable_posteriors, additional_posteriors, value);
        break;
case BUDGET: SolveMAP_budget(vertex,total, variable_posteriors, additional_posteriors, value);
  }
 };

};
/* end of ad3_vertex_program*/


////////////////////////////////////////////////////////////////////////////////
// This class implements the Bethe-ADMM as  described in:
//
//   Qiang Fu, Huahua Wang and Arindam Banerjee.
// "Bethe-ADMM for Tree Decomposition based Parallel MAP Inference"
//  Conference on Uncertainty in Artificial Intelligence (UAI), 2013
//
//////////////////////////////////////////////////////////////////////////////// 

struct bethe_admm_vertex_program:public admm_vertex_program {

/* compute_grad_phi computes the gradient of bethe entropy for the factor */
  
       void compute_grad_phi(vertex_type& vertex,vec& unary_beliefs, 
                   vec& factor_beliefs, vec& unary_grad,vec& factor_grad) {

             vertex_data& vdata = vertex.data();  
            // computation for variable beliefs  
             for(int i=0; i< unary_beliefs.size(); i++){
                 unary_grad[i] *= EXP * (unary_beliefs[i]);
             }
             // computation for factor beliefs
             if(vdata.factor_type == DENSE){
             vector<int> states(vdata.nvars);
             for(int i=0; i< factor_beliefs.size(); i++){
                 factor_grad[i] *= factor_beliefs[i] / EXP;
                 get_configuration_states(vertex, i, &states);
                 int offset =0;
                 for(int j=0; j< vdata.nvars; j++){
                     factor_grad[i] /= unary_beliefs[offset + states[j]];
                     offset += vdata.cards[j];
                 }
             }
           }
        }


/* run_bp computes marginal beliefs using sum-product belief propagation */
       void run_bp(vertex_type& vertex, vec& unary_pots, vec& factor_pots, 
             vec& unary_margs, vec& factor_margs) {

            vertex_data& vdata = vertex.data();        
            unary_margs.resize(unary_pots.size());
            factor_margs = factor_pots;
            vector<int> states(vdata.nvars, -1);
           // computing messages
           for(int i=0; i < vdata.nvars; i++) {
               
               vec messages = factor_pots;
               for(int j=0; j < factor_pots.size(); j++) {
                  get_configuration_states(vertex, j, &states);
                  int offset = 0;
                  for(int k = 0; k < vdata.nvars; k++) {
                     if( k != i)
                     messages[j] *= unary_pots[offset + states[k]];
                     offset += vdata.cards[k];
                  }
               }
               vector<double>  marg_messages(vdata.cards[i], 0);
               for(int j=0; j < factor_pots.size(); j++)  {
                   get_configuration_states(vertex, j, &states);
                   marg_messages[states[i]] += messages[j];
                   }
               int var_offset =0;
               for(int j=0; j < i; j++) {          
                   var_offset += vdata.cards[j];
               }
               double sum = 0;
               // computing marginal beliefs for variables
               for(int j=0; j < marg_messages.size(); j++) { 
                  unary_margs[var_offset + j] = marg_messages[j] 
                                           * unary_pots[var_offset + j];
                  sum += unary_margs[var_offset + j];
               }
               for(int j=0; j < marg_messages.size(); j++) { 
                   unary_margs[var_offset + j] /= sum ; 
               }       
          }
          // compuitng factor beliefs
          double fact_sum = 0;
          for(int i=0; i < factor_pots.size(); i++) {
              get_configuration_states(vertex, i, &states);
              int offset = 0;
              for(int j =0; j < vdata.nvars; j++) {
                 factor_margs[i] *= unary_pots[offset + states[j]];
                 offset += vdata.cards[j];
              }
              fact_sum += factor_margs[i];
          }
        
          for(int i=0; i < factor_pots.size(); i++) {
              factor_margs[i] /= fact_sum;
          }
         
       }


/* adjust_beliefs prevents overflow/ underflow of belief variable */
       void adjust_beliefs(vertex_type& vertex){
            
            vertex_data& vdata = vertex.data();
            for(int i=0; i< vdata.beliefs.size(); i++){
                if(vdata.beliefs[i] < 10e-100)
                   vdata.beliefs[i] = 10e-100;
            }  
            
            if(vdata.factor_type == DENSE){
            for(int i=0; i< vdata.factor_beliefs.size(); i++){
                if(vdata.factor_beliefs[i] < 10e-100)
                   vdata.factor_beliefs[i] = 10e-100;
            }
           } 
       }
       
/* exponentiates potentials for bp. TODO: use faster approximation of pow */
       void exponentiate(vec& potential_vector){
       
            for(int i=0; i< potential_vector.size(); i++){
                potential_vector[i] = pow(EXP, potential_vector[i]);
            }
       }
       
       void softmax(vertex_type& vertex, vec& unary_pots, vec& unary_margs){
                   unary_margs = unary_pots/unary_pots.sum();}
         /* solves QP for factor vertices using bp */
       void compute_beliefs(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors){

             vertex_data& vdata = vertex.data();
             vec unary_eta, factor_eta;
             vec unary_grad, factor_grad;
             // computing eta  
             
             unary_eta = total.messages + opts.step_size * (total.neighbor_distribution - vdata.beliefs); 
             unary_eta = (unary_eta)/opts.alpha;        
             exponentiate(unary_eta);
             unary_grad.resize(unary_eta.size());
             
             if(vdata.factor_type == DENSE){
             factor_eta = (vdata.potentials)/opts.alpha; 
             exponentiate(factor_eta);
	     factor_grad.resize(factor_eta.size());
             }
             compute_grad_phi(vertex, vdata.beliefs, vdata.factor_beliefs, unary_eta, factor_eta);
             //running bp on eta
             switch(vdata.factor_type){
             case DENSE: run_bp(vertex, unary_eta, factor_eta, vdata.beliefs, vdata.factor_beliefs); 
                         break;
             case XOR: softmax(vertex, unary_eta, vdata.beliefs);
             }
             //adjust beliefs for overflow/underflow          
             adjust_beliefs(vertex);             
        };
 /* solves MAP for factor vertices */      
        void SolveMAP_dense(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value){
             vertex_data& vdata = vertex.data();
             vec beliefs = vdata.potentials;
             vector<int> states(vdata.nvars);
             for(int i=0; i< vdata.potentials.size(); i++) {
                 get_configuration_states(vertex, i, &states);
                 int offset = 0;
                 for(int j=0; j< vdata.nvars; j++){
                     beliefs[i] += total.messages[offset + states[j]];
                     offset += vdata.cards[j];
                 }
             }
             value = beliefs.maxCoeff();  
         };
 
         void SolveMAP_xor(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value) {
            vertex_data& vdata =  vertex.data();
            vec log_potentials = total.messages;

            int first = -1;
            value = 0.0;

            for (int f = 0; f < log_potentials.size(); ++f) {
            if (first < 0 || log_potentials[f] > log_potentials[first]) first = f;
             }
            value += log_potentials[first];
            variable_posteriors.setZero();
            variable_posteriors[first] = 1.0;
        };

       void SolveMAP(vertex_type& vertex,const gather_type& total,
            vec& variable_posteriors, vec& additional_posteriors, double& value){
             switch(vertex.data().factor_type){
             case XOR: SolveMAP_xor(vertex,total, variable_posteriors, additional_posteriors, value);
                       break;
             case DENSE: SolveMAP_dense(vertex,total, variable_posteriors, additional_posteriors, value);
             }
       };


};
/* end of  bethe_admm_vertex_program */

#endif


================================================
FILE: toolkits/graphical_models/dd_grlab.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application performs MAP inference on Markov Nets 
 * provided in standard UAI file format via Dual-Decomposition. 
 *
 *
 *  \authors Dhruv Batra, André Martins, Aroma Mahendru
 */


#ifndef __DD_GRLAB_HPP__
#define __DD_GRLAB_HPP__

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <Eigen/Dense>
#include "eigen_serialization.hpp"

#include "dd_opts.hpp"
#include "utils.hpp"

#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


using namespace std;


/**
 * \brief Eigen library vectors are used to store the potentials (log-space)
 */
typedef Eigen::VectorXd factor_type;
typedef Eigen::VectorXd vec;
typedef Eigen::MatrixXd mat;


struct dd_global_vars {

double old_dual ;        // stores the value of dual objective for the previous iteration
double primal_best ;     //  stores the value of best primal objective found so far.
bool converged ;         // true if dual objective value has converged to required tolerance level, otherwise false
int dual_inc_count ;     // keeps track of the number of times the value of dual objective increased
vector < vector<double> > history ; // stores dual and primal objective values
int sq_norm_g ;   //  stores the value of the square of the norm of the subgradient vector
int iter_at_aggregate ;  //  iteration number at the time of aggregate
graphlab::timer timer ; //  time object. Helps in finding the time elapsed.
dd_global_vars(): old_dual(200), primal_best(-1e10),
                  converged(false), dual_inc_count(1),
                  history(4,vector<double>()), 
                  sq_norm_g(100), 
                  iter_at_aggregate(0){};
} global_vars;

/* end of struct dd_global_vars */


/////////////////////////////////////////////////////////////////////////
// Edge and Vertex data and Graph Type
/**
 * \brief There is a vertex for each factor in the graph AND each singleton
 */
struct vertex_data 
{
    int nvars;                 // Number of variables in this factor.
    int factor_type;           //type of  factor : dense(0), budget(1)
    int degree;                // Degree of this factor (same as nvars for higher-order factors).
    
    vector<int> cards;         // Cardinality of each variable.
    vector<int> neighbors;     // Vertex ids of the neighbors.    
    vec potentials;            // Potentials for each configuration of the factor.
    int budget;                // Only for Budget factors
    vector <int> bound_states;  // Only for Budget Factors
    vector<int> unary_degree;   // Only for unary vertices
    
    int best_configuration;    // Index of the best configuration at a subgradient step.
                               // TODO: Maybe replace best_configuration by beliefs for the high order variables?
                               // In which case, beliefs would be vector<vec> beliefs.

    double dual_contrib;       // Contribution of this factor to the dual. We can compute this during the applies. 
    double primal_contrib;     // Contribution of this factor to the primal. We can compute this during the applies. 
                               // NOTENOTE: ONLY true for sync engine. For async, we need to write an aggregator function. 
    double dual_res_contrib;   // Contribution of this factor to the dual residual. (Used only for ADMM)                    
    double primal_res_contrib; // Contribution of this factor to the primal residual. (Used only for ADMM)   
    double primal_rel_contrib; // Contribution of this factor to the relaxed primal. (Used only for ADMM)   
    
    vec beliefs;               // Posterior values for the configurations after averaging (projected DD, unary variables only).
    vec factor_beliefs;        // Posterior value for factor variables
    int apply_count;           // No. of times apply has been called on this vertex
    int sum_sq_norm_g;         // Sum of square of norm of subgradient for each vertex (used only for factor vertices)
    
    bool schedule_vertex;      // Decides if vertex is to be scheduled for further iterations or not
    
    vector<double> distribution_;
    vector<int> active_set_;
    vector<double> inverse_A_;
    vector<pair<double, int> > last_sort_;

    vertex_data(): 
    nvars(0), factor_type(0), degree(0),
    budget(0),
    best_configuration(0),
    dual_contrib(0), primal_contrib(0),
    dual_res_contrib(0), primal_res_contrib(0), primal_rel_contrib(0),
    sum_sq_norm_g(0),
    apply_count(0),
    schedule_vertex(true)
    {}
    
    void load(graphlab::iarchive& arc) 
    {
        arc >> nvars >> degree 
            >> cards >> neighbors >> potentials 
            >> dual_contrib >> primal_contrib
            >> best_configuration >> beliefs 
            >> apply_count>>factor_beliefs
            >>sum_sq_norm_g>>primal_res_contrib
            >>primal_rel_contrib>>dual_res_contrib
            >>schedule_vertex>>factor_type
            >>budget>>bound_states
            >>unary_degree>>active_set_
            >>inverse_A_>>last_sort_ ;
    }
    void save(graphlab::oarchive& arc) const 
    {
        arc << nvars << degree 
            << cards << neighbors << potentials 
            << dual_contrib << primal_contrib
            << best_configuration << beliefs 
            << apply_count <<factor_beliefs
            << sum_sq_norm_g<<primal_res_contrib
            <<primal_rel_contrib<<dual_res_contrib
            <<schedule_vertex<<factor_type
            <<budget<<bound_states
            <<unary_degree<<active_set_
            <<inverse_A_<<last_sort_;
    }
}; // end of vertex_data


/**
 * \brief There is an edge connecting each factor to each singleton
 * in its scope.
 */
struct edge_data 
{ 
    vec potentials; 
    
    vec multiplier_messages; // Dual variables, i.e. Lagrangian multipliers.
    vec local_messages;      // Local MAP variables (for projected DD).
    
    void load(graphlab::iarchive& arc) {
        arc >> potentials >> multiplier_messages >> local_messages;
    }
    void save(graphlab::oarchive& arc) const {
        arc << potentials << multiplier_messages << local_messages;
    }
};  //end of edge_data


/**
 * \brief gather_type is a structure that will be used as the return type of gather function. It includes 
 *  messages (used both for unary and factor vertices), neighbor_best_conf, neighbor_distribution (used only for 
 *  factor vertices) and sq_norm_g (for storing square of norm of subgradient) for each edge.
 */

struct gather_type
{ factor_type messages;
  factor_type multipliers;
  vector <int> neighbor_conf;
  vec neighbor_distribution;
  int sq_norm_g;
     
    gather_type():sq_norm_g(0){};
    
    
    gather_type(factor_type f, vector <int> nc = vector <int> (), int sg = 0, 
                             vec nd = vec() ): messages(f), neighbor_conf(nc),
                                   neighbor_distribution(nd), sq_norm_g(sg){};
    void load(graphlab::iarchive& arc) {
        arc >>messages>>neighbor_conf
            >>sq_norm_g>>neighbor_distribution
            >>multipliers;
    }
    void save(graphlab::oarchive& arc) const {
        arc <<messages<<neighbor_conf
            <<sq_norm_g<<neighbor_distribution
            <<multipliers;
    }

  gather_type& operator+=(const gather_type& other)
 { messages += other.messages;
   neighbor_conf += other.neighbor_conf;
   neighbor_distribution += other.neighbor_distribution;
   sq_norm_g += other.sq_norm_g;
   multipliers += other.multipliers;
   
   return *this;
 }

}; // end of gather_type struct


/**
 * \brief objective is a structure that is used as the summable data type for computing 
 * dual, primal objectives, residuals etc with aggregator map and reduce functions.
 */

struct objective
{ double primal, dual, primal_rel, sum_sq_norm_g, total_confs, dual_res, primal_res;
 
objective(): primal(0), dual(0), primal_rel(0), sum_sq_norm_g(0), total_confs(1),dual_res(1), primal_res(1){};

void load(graphlab::iarchive& arc) {
        arc >>dual>>primal>>sum_sq_norm_g
            >>primal_rel>>total_confs
            >>dual_res>>primal_res;
    }
    void save(graphlab::oarchive& arc) const {
        arc <<dual<<primal<<sum_sq_norm_g
            <<primal_rel<<total_confs
            <<dual_res<<primal_res;
    }

objective& operator+=(const objective& other)
{ primal += other.primal;
   dual += other.dual;
   sum_sq_norm_g += other.sum_sq_norm_g;
   primal_rel += other.primal_rel;
   total_confs += other.total_confs;
   dual_res += other.dual_res;
   primal_res += other.primal_res;
   return *this;
 }
}; // end of structure objective

/**
 * The graph type
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/** 
 * \brief The Dual Decomposition Vertex Program.
 */
struct dd_vertex_program : 
public graphlab::ivertex_program< graph_type, gather_type,
graphlab::messages::sum_priority >,
public graphlab::IS_POD_TYPE 
{
    
    /////////////////////////////////////////////////////////////////////////
    // Find the configuration index of a factor given the array of states.
    /////////////////////////////////////////////////////////////////////////
    int get_configuration_index(const graph_type::vertex_type& vertex,
                                const std::vector<int>& states) const 
    {
        const vertex_data& vdata = vertex.data();
        int index = states[0];
        for (size_t i = 1; i < states.size(); ++i) 
        {
            index *= vdata.cards[i];
            index += states[i];
        }
        return index;
    }
    
    /////////////////////////////////////////////////////////////////////////
    // Find the array of states corresponding to a factor configuration index.
    /////////////////////////////////////////////////////////////////////////
    void get_configuration_states(const graph_type::vertex_type& vertex,
                                  int index, std::vector<int>* states) const 
    {   
        const vertex_data& vdata = vertex.data();
        int tmp = 1;
        for (size_t i = 1; i < states->size(); ++i) 
            tmp *= vdata.cards[i];
        
        (*states)[0] = index / tmp;
        for (size_t i = 1; i < states->size(); ++i) 
        {   
            index = index % tmp;
            tmp /= vdata.cards[i];
            (*states)[i] = index / tmp;
        }
       
    }

    ///////////////////////////////////////////////////////////
    // Updates stepsize according to different stepsize rules. 
    ///////////////////////////////////////////////////////////

    double update_stepsize(icontext_type& context, int type, double old_dual, double primal_best,int norm_g_sq,
                                                                             int dual_inc_count,int iter_since_aggregate) const
   {  switch (type) {
      case 0: return opts.step_size;
              break;
      case 1: return(opts.step_size/(context.iteration()+2));
              break;
      case 2: return(2* opts.step_size *(old_dual-primal_best)/((norm_g_sq+1) * (iter_since_aggregate + dual_inc_count + 1)));
              break;
      case 3: return(opts.step_size/dual_inc_count + 1);
              break;
      case 4: if(context.iteration()+2 < 300)
              return( opts.step_size/(context.iteration()+2));
              else return(opts.step_size/(300));
                     }
   }
    
    /**
     * \brief Given an edge and a vertex return the other vertex along
     * that edge. 
     */
    inline vertex_type get_other_vertex(edge_type& edge, 
                                        const vertex_type& vertex) const 
    {
        return vertex.id() == edge.source().id()? edge.target() : edge.source();
    }; // end of other_vertex
    
    
    virtual edge_dir_type gather_edges(icontext_type& context,
                                       const vertex_type& vertex) const = 0;
    virtual gather_type gather(icontext_type& context, const vertex_type& vertex, 
                               edge_type& edge) const = 0;
    virtual void apply(icontext_type& context, vertex_type& vertex, 
                       const gather_type& total) = 0;
    virtual edge_dir_type scatter_edges(icontext_type& context,
                                        const vertex_type& vertex) const = 0; 
    virtual void scatter(icontext_type& context, const vertex_type& vertex, 
                         edge_type& edge) const = 0;
}; // end of class bp_vertex_program


////////////////////////////////////////////////////////////////////////////////
// This class implements the "symmetric" version of dual decomposition described
// in:
// D. Sontag, A. Globerson, T. Jaakkola.
// Introduction to Dual Decomposition for Inference.
// Optimization for Machine Learning, editors S. Sra, S. Nowozin, and S. J.
// Wright: MIT Press, 2011
////////////////////////////////////////////////////////////////////////////////

struct dd_vertex_program_symmetric : public dd_vertex_program {
    /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const 
    {   if(!opts.debug){
        return graphlab::ALL_EDGES; }
        else 
        return graphlab::NO_EDGES;
    }; // end of gather_edges 
    
    /**
     * \brief The gather function takes a vertex and an edge as inputs and outputs 
     a vector of numeric values. Vectors of numeric values will later be summed 
     over all edges incident in this vertex. So, if the vertex is a unary factor, 
     we can just return the vector of Lagrange multipliers stored in "edge.messages". 
     Otherwise (if vertex is a general factor), things are a little more tricky. 
     Suppose the factor is linked to K variables, with cardinalities C_1, ..., C_K. 
     Suppose this edge is with respect to the k-th variable. Then, we return a 
     vector of size C_1 + ... + C_K which is zero everywhere except in the 
     k-th slot, where the Lagrange multipliers in "edge.messages" will be copied 
     to. This way, when the "gather sum" takes place, and since all these slots 
     are disjoint, we will just get the Lagrange multipliers of all the variables.
     It also gathers the best_configuration of neighbors for the factors and norm
     of subgradient value for each edge.    
     */
     gather_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const 
     {         
        if (opts.verbose > 1)
            cout << "gather begin" << endl;
        
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        const vertex_data& vdata = vertex.data();
        edge_data& edata = edge.data();
        
        if (vdata.nvars == 1) 
        {
            // Unary factor.
            if (opts.verbose > 1)
                cout << "This unary factor has " << vertex.num_in_edges() << 
                " in edges and " << vertex.num_out_edges() << " out edges" << endl;

            if (opts.verbose>1)
            {
                cout << "Gather on (" << vertex.id() << "," << other_vertex.id() 
                                     << ") called from " << vertex.id() << "\n";
                cout << "vdata.neighbours = " << vdata.neighbors << "\n";
                cout << "Message: " << edata.multiplier_messages << "\n---\n";
            }
            
            gather_type gather_data(edata.multiplier_messages);
            return gather_data;
        } 
        else 
        {
            // General factor.
            factor_type messages;
            messages.setZero(vdata.potentials.size());
            int offset = 0;
            int index_neighbor = -1;
            for (int k = 0; k < vdata.nvars; ++k) 
            {
                int vertex_id = vdata.neighbors[k];
                if (vertex_id == other_vertex.id()) 
                {
                    index_neighbor = k;
                    break;
                }
                offset += vdata.cards[k];
            }
            CHECK_GE(index_neighbor, 0);
            vector <int> neighbor_conf(vdata.nvars, 0);
            neighbor_conf[index_neighbor] = other_vertex.data().best_configuration;
            

            for (int state = 0; state < vdata.cards[index_neighbor]; ++state) 
            {
                messages[offset + state] = -edata.multiplier_messages[state];
            }

     
            if (opts.verbose>1) 
            {
                cout << "Gather on (" << vertex.id() << "," << other_vertex.id() << ") called from " << vertex.id() << "\n";
                cout << "vdata.neighbours = " << vdata.neighbors << "\n";
                cout << "estimated offset = " << offset << "\n";
                cout << "Message: " << messages << "\n---\n";
            }
            vector<int> states(vdata.nvars, -1);
            get_configuration_states(vertex, vdata.best_configuration, &states);
            int sq_norm_g = (states[index_neighbor] == other_vertex.data().best_configuration)?0:2;
            gather_type gather_data(messages, neighbor_conf);
            gather_data.sq_norm_g = sq_norm_g;
            return gather_data;
        }
        if (opts.verbose > 2)
            cout << "gather end" << endl;
    }; // end of gather function
    
    /**
     * \brief The apply function takes a vertex and a vector of numeric values 
     (a total) as input. For unary vertices, this will be the sum of Lagrange 
     multipliers, and we just need to sum that to the vertex potential and compute 
     the argmax. For general factors, the vector of numeric values, as stated above, 
     will contain all the Lagrange multipliers of the neighboring variables. 
     So we need to loop through all possible factor configurations, get the 
     sequence of states of each configuration, fetch the Lagrange multipliers for 
     those states, and add them to the factor potential. Then we compute the argmax. 
     It also computes dual and primal contribution for finding dual and primal
     objective values.
     */
    void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) 
    {   if (!opts.debug){     
        vertex_data& vdata = vertex.data();
                
        if (vdata.nvars == 1) 
        {
            // Unary factor.
            ASSERT_EQ(vdata.potentials.size(), total.messages.size());
            
            vec belief = vdata.potentials + total.messages;
             // Find primal contrib
            vdata.primal_contrib = vdata.potentials[vdata.best_configuration];
             // Save the best configuration for this vertex and find dual contrib
            vdata.dual_contrib = belief.maxCoeff(&vdata.best_configuration);
            
            if (opts.verbose > 1)
            {
                cout << "Vertex: " << vertex.id() << "\n";
                cout << "Potential: " << vdata.potentials << "\n";
                cout << "incomming message: " << total.messages << "\n";
                cout << "belief: " << belief << "\n";
                cout << "dual contrib: " << vdata.dual_contrib << "\n";                
                cout << "vdata.best_configuration = " << vdata.best_configuration << "\n---\n";
            }
        } 
        else 
        {
            // General factor.
            vec belief = vdata.potentials;
            int num_configurations = vdata.potentials.size();
            for (int index_configuration = 0;
                 index_configuration < num_configurations;
                 ++index_configuration) 
            {
                vector<int> states(vdata.nvars, -1);
                // This could be made more efficient by defining an iterator over factor
                // configurations.
                get_configuration_states(vertex, index_configuration, &states);
                int offset = 0;
                for (int k = 0; k < vdata.nvars; ++k) 
                {
                    belief[index_configuration] += total.messages[offset + states[k]];
                    offset += vdata.cards[k];
                }
            }
            // Save the best configuration for this factor and find dual contrib
            vdata.dual_contrib = belief.maxCoeff(&vdata.best_configuration);
            //Find primal contrib
            int conf_index = get_configuration_index(vertex, total.neighbor_conf);
            vdata.primal_contrib = vdata.potentials[conf_index];
            //Find contribution fir sum of square of gradient
            vdata.sum_sq_norm_g = total.sq_norm_g;
            
            if (opts.verbose > 1)
            {
                cout << "Vertex: " << vertex.id() << "\n";
                cout << "Potential: " << vdata.potentials << "\n";
                cout << "incomming message: " << total.messages << "\n";
                cout << "belief: " << belief << "\n";
                cout << "dual contrib: " << vdata.dual_contrib << "\n";
                cout << "vdata.best_configuration = " << vdata.best_configuration << "\n---\n";
            }
        }
       }
      else usleep(1);

      if (opts.verbose > 1)
            cout << "end apply" << endl;
    }; // end of apply
    
    /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type scatter_edges(icontext_type& context, const vertex_type& vertex) const 
    { 
         return graphlab::ALL_EDGES; 
        // NOTENOTE: This assumes a sync engine. 
    }; // end of scatter edges
    
    /**
     * \brief The scatter function takes a vertex and an edge as input. 
     We just need to update the messages (Lagrange multipliers) by looking at the 
     saved argmaxes.
     */
    void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const 
    {  
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        const vertex_type *unary_vertex;
        const vertex_type *factor_vertex;
     if (!opts.debug){       
        if (vertex.data().nvars == 1) 
        {
            // Unary factor.
            unary_vertex = &vertex;
            factor_vertex = &other_vertex;
        } 
        else 
        {
            // General factor.
            unary_vertex = &other_vertex;
            factor_vertex = &vertex;
        }
        const vertex_data& vdata = unary_vertex->data();
        const vertex_data& other_vdata = factor_vertex->data();
        edge_data& edata = edge.data();
        
        if (opts.verbose > 1)
            cout << "begin scatter" << endl;
                        
        int iter_since_aggregate = (context.iteration()+2) - global_vars.iter_at_aggregate ;
        double stepsize = update_stepsize(context, 1, global_vars.old_dual, global_vars.primal_best, 
                                      global_vars.sq_norm_g, global_vars.dual_inc_count, iter_since_aggregate);
         
        CHECK_GE(vdata.best_configuration, 0);                                                            
        CHECK_LT(vdata.best_configuration, vdata.cards[0]);    
        
        // Negative subgradient
       
        edata.multiplier_messages[vdata.best_configuration] -= stepsize; 
        
        vector<int> states(other_vdata.nvars, -1);
        get_configuration_states(*factor_vertex, other_vdata.best_configuration, &states);
        int offset = 0;
        int index_neighbor = -1;
        for (int k = 0; k < other_vdata.nvars; ++k) {
            int vertex_id = other_vdata.neighbors[k];
            if (vertex_id == unary_vertex->id()) {
                index_neighbor = k;
                break;
            }
            offset += other_vdata.cards[k];
        }
        CHECK_GE(index_neighbor, 0);
        CHECK_GE(states[index_neighbor], 0);
        CHECK_LT(states[index_neighbor], other_vdata.cards[index_neighbor]);
        CHECK_EQ(other_vdata.cards[index_neighbor], vdata.cards[0]);

        // Negative subgradient
        edata.multiplier_messages[states[index_neighbor]] += stepsize;
        
        //if (opts.verbose > 1)
        if (opts.verbose>1) 
        {
            cout << "Scatter on (" << unary_vertex->id() << "," << factor_vertex->id() << ") called from " << vertex.id() << "\n";
            cout << "unary best config = " << vdata.best_configuration << "\n"
                 << "factor best config = " << states[index_neighbor] << "\n"; 
            cout << "Message: " << edata.multiplier_messages << "\n---\n";
        }
       }
        if (opts.verbose > 1)
            cout << "end scatter" << endl;
        
        // Signalling the other vertex and yourself to start. 
        if ((context.iteration()+1) < opts.maxiter && global_vars.converged == false)
        {
            context.signal(vertex);
            context.signal(other_vertex);
        }

    }; // end of scatter

}; // end of class dd_vertex_program_symmetric


////////////////////////////////////////////////////////////////////////////////
// This class implements the "projected" version of dual decomposition described
// in:
// Komodakis, N., Paragios, N., and Tziritas, G. (2007).
// "MRF optimization via dual decomposition: Message-passing revisited"
// In Proc. of International Conference on Computer Vision.
// 
// The formulation used is the one in Algorithm 1 of:
//
// André F. T. Martins, Mário A. T. Figueiredo, Pedro M. Q. Aguiar,
// Noah A. Smith, and Eric P. Xing.
// "An Augmented Lagrangian Approach to Constrained MAP Inference."
// International Conference on Machine Learning (ICML), 2011.
////////////////////////////////////////////////////////////////////////////////

struct dd_vertex_program_projected : public dd_vertex_program {

     graphlab::timer vertex_timer;
    /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const { 
        if(!opts.debug){
        return graphlab::ALL_EDGES; }
        else 
        return graphlab::NO_EDGES;
    }; // end of gather_edges 
    
    /**
     * \brief The gather function takes a vertex and an edge as inputs and outputs 
     a vector of numeric values. Vectors of numeric values will later be summed 
     over all edges incident in this vertex. 
     If the vertex is a unary factor, compute the sum of all the local MAP variables,
     which in the "apply" function will serve to compute the global MAP. 
     Otherwise (if vertex is a general factor), things are a little more tricky. 
     Suppose the factor is linked to K variables, with cardinalities C_1, ..., C_K. 
     Suppose this edge is with respect to the k-th variable. Then, we return a 
     vector of size C_1 + ... + C_K which is zero everywhere except in the 
     k-th slot, where the Lagrange multipliers in "edge.messages" will be copied 
     to. This way, when the "gather sum" takes place, and since all these slots 
     are disjoint, we will just get the Lagrange multipliers of all the variables.    
     */
    gather_type gather(icontext_type& context, const vertex_type& vertex, 
                       edge_type& edge) const {
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        const vertex_data& vdata = vertex.data();
        edge_data& edata = edge.data();
       
        if (vdata.factor_type == VAR ) 
        {   // Unary factor.
            if (opts.verbose > 1) {
            cout << "This unary factor has " << vertex.num_in_edges() << 
            " in edges and " << vertex.num_out_edges() << " out edges" << endl; }
            gather_type gatherdata(edata.local_messages);
            return gatherdata; 
        } 
        else 
        {   
            // General factor.
            factor_type messages;
            //messages.resize(vdata.potentials.size());
            messages.setZero(vdata.potentials.size());
            int offset = 0;
            int index_neighbor = -1;
            for (int k = 0; k < vdata.nvars; ++k) {
                int vertex_id = vdata.neighbors[k];
                if (vertex_id == other_vertex.id()) {
                    index_neighbor = k;
                    break;
                }
                offset += vdata.cards[k];
            }
    
            CHECK_GE(index_neighbor, 0);
            vector <int> neighbor_conf(vdata.nvars, 0);
            neighbor_conf[index_neighbor] = other_vertex.data().best_configuration;
            for (int state = 0; state < vdata.cards[index_neighbor]; ++state) {
                messages[offset + state] = edata.multiplier_messages[state] + edata.potentials[state];               
            }
            gather_type gather_data(messages,neighbor_conf);
            return gather_data;
        }
    }; // end of gather function
    
    
    /**
     * \brief The apply function takes a vertex and a vector of numeric values 
     (a total) as input. 
     For a unary vertex, "total" will be the sum of local MAP vectors, and we 
     just need to divide by the vertex degree and save the result as global MAP.
     For higher-order factors, "total" will contain all the Lagrange multipliers 
     of the neighboring variables. So we need to loop through all possible factor 
     configurations, get the sequence of states of each configuration, fetch the 
     Lagrange multipliers for those states, and add them to the factor potential. 
     Then we compute the argmax and save result to local MAP for each variable 
     connected to the factor. 
     Note that since global MAP is computed from local MAP , locla MAP is needed 
     to be computed before global MAP. Hence in even iterations local MAP is 
     computed ( in scatter step) and in the subsequent iteration (which is hence 
     odd) global MAP (in apply step) and multiplier messages (in scatter)
     are updated.
     */
    void apply(icontext_type& context, vertex_type& vertex, 
               const gather_type& total) {
        vertex_data& vdata = vertex.data();
       
     if (!opts.debug){
        if (vdata.factor_type == VAR ) {
           if (context.iteration()%2 != 0) {   
            vdata.beliefs = total.messages / static_cast<double>(vdata.degree);
            vdata.beliefs.maxCoeff(&vdata.best_configuration);
            //Find primal contrib
            vdata.primal_contrib = vdata.potentials[vdata.best_configuration];
            }
        } 
        else {
          if(context.iteration()%2 == 0){
            // General factor.
            vec beliefs = vdata.potentials;
            int num_configurations = vdata.potentials.size();
            for (int index_configuration = 0;
                 index_configuration < num_configurations;
                 ++index_configuration) {
                vector<int> states(vdata.nvars, -1);
                // This could be made more efficient by defining an iterator over factor
                // configurations.
                get_configuration_states(vertex, index_configuration, &states);
                int offset = 0;
                for (int k = 0; k < vdata.nvars; ++k) {
                    beliefs[index_configuration] += total.messages[offset + states[k]];
                    offset += vdata.cards[k];
                }
            }
            // Save the best configuration for this factor and find dual contrib             
            vdata.dual_contrib = beliefs.maxCoeff(&vdata.best_configuration);
            //Find primal contrib
            int conf_index = get_configuration_index(vertex, total.neighbor_conf);
            vdata.primal_contrib = vdata.potentials[conf_index];       
         }
        }
      }
      else usleep(1);
    }; // end of apply
    
  
        /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type scatter_edges(icontext_type& context,
                                const vertex_type& vertex) const { 
        return graphlab::ALL_EDGES; 
    }; // end of scatter edges
    
    /**
     * \brief The scatter function takes a vertex and an edge as input. 
     (1) If the vertex is a unary factor, we update the messages (Lagrange multipliers)
     by subtracting the global MAP by the local MAP.
     (2) If the vertex is a higher order factor, this function will take the best
     configuration (obtained at the apply function) and save the local MAP 
     at the corresponding edge.
     */
    void scatter(icontext_type& context, const vertex_type& vertex, 
                 edge_type& edge) const {  
       
        const vertex_data& vdata = vertex.data();
        edge_data& edata = edge.data();
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        if(!opts.debug){
        if (vdata.factor_type == VAR ) {
            if (context.iteration()%2 != 0) {

            // Unary factor. Update the messages (Lagrange multipliers).      
             double stepsize = update_stepsize(context, 1, global_vars.old_dual, global_vars.primal_best, 
                                              global_vars.sq_norm_g, global_vars.dual_inc_count, 0);
            
             edata.multiplier_messages += (vdata.beliefs - edata.local_messages) * stepsize;
          }  
        } 
       else 
       {   if (context.iteration()%2 == 0) {
            //General factor. Update the local MAPs.
            const vertex_type &unary_vertex = get_other_vertex(edge, vertex);
            vector<int> states(vdata.nvars, -1);
            get_configuration_states(vertex, vdata.best_configuration, &states);
            int offset = 0;
            int index_neighbor = -1;
            for (int k = 0; k < vdata.nvars; ++k) {
                int vertex_id = vdata.neighbors[k];
                if (vertex_id == unary_vertex.id()) {
                    index_neighbor = k;
                    break;
                }
                offset += vdata.cards[k];
            }
            CHECK_GE(index_neighbor, 0);
            CHECK_GE(states[index_neighbor], 0);
            CHECK_LT(states[index_neighbor], vdata.cards[index_neighbor]);
            //CHECK_EQ(vdata.cards[index_neighbor], unary_vertex.data().cards[0]);
            edata.local_messages.setZero();
            edata.local_messages[states[index_neighbor]] += 1.0; 
          }
        }
      }
        if ((context.iteration()+1) < opts.maxiter && global_vars.converged == false)
        {
            context.signal(vertex);
            context.signal(other_vertex);
        }
    }; // end of scatter
}; // end of class dd_vertex_program_projected


///////////////////////////////////////////////////////////////////////////////////
// This class implements the general Alternating Directions Method of Multipliers.
//  
//  The formulation used is the one in Algorithm 2 of:
//
// André F. T. Martins, Mário A. T. Figueiredo, Pedro M. Q. Aguiar,
// Noah A. Smith, and Eric P. Xing.
// "Alternating Directions Dual Decomposition"
// Arxiv preprint arXiv:1212.6550, 2012.

///////////////////////////////////////////////////////////////////////////////////
 
struct admm_vertex_program:public dd_vertex_program {
  
  typedef int Configuration;
  
  // Function to solve each quadratic programming sub problem 
  virtual void compute_beliefs(vertex_type& vertex,const gather_type& total,
                 vec& variable_posteriors, vec& additional_posteriors) = 0;
  virtual void SolveMAP(vertex_type& vertex,const gather_type& total,
                 vec& variable_posteriors, vec& additional_posteriors, double& value) = 0;
                                 
                                 
   /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const { 
        if(!opts.debug){
        return graphlab::ALL_EDGES; }
        else 
        return graphlab::NO_EDGES;
    }; // end of gather_edges 
    
    /**
     * \brief The gather function takes a vertex and an edge as inputs and outputs 
     a vector of numeric values. Vectors of numeric values will later be summed 
     over all edges incident in this vertex. 
     If the vertex is a unary factor, compute the sum of all the local MAP variables,
     which in the "apply" function will serve to compute the global MAP. 
     Otherwise (if vertex is a general factor), things are a little more tricky. 
     Suppose the factor is linked to K variables, with cardinalities C_1, ..., C_K. 
     Suppose this edge is with respect to the k-th variable. Then, we return a 
     vector of size C_1 + ... + C_K which is zero everywhere except in the 
     k-th slot, where the Lagrange multipliers in "edge.messages" will be copied 
     to. This way, when the "gather sum" takes place, and since all these slots 
     are disjoint, we will just get the Lagrange multipliers of all the variables.    
     */

   gather_type gather(icontext_type& context, const vertex_type& vertex, 
                       edge_type& edge) const {
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        const vertex_data& vdata = vertex.data();
        edge_data& edata = edge.data();
        if (vdata.factor_type == VAR ) {  
            // Unary factor.
            if (opts.verbose > 1){
            cout << "This unary factor has " << vertex.num_in_edges() << 
            " in edges and " << vertex.num_out_edges() << " out edges" << endl; 
            }
            gather_type gatherdata(edata.local_messages);
            return gatherdata; 
        } 
        else if(vdata.factor_type != BUDGET){  
            // Dense factor.
            factor_type messages, neighbor_distribution, multipliers;        
            messages.setZero(vdata.potentials.size());
            neighbor_distribution.setZero(vdata.potentials.size());
            multipliers.setZero(vdata.potentials.size());
            int offset = 0;
            int index_neighbor = -1;
            for (int k = 0; k < vdata.nvars; ++k) {
                int vertex_id = vdata.neighbors[k];
                if (vertex_id == other_vertex.id()) {
                    index_neighbor = k;
                    break;}
                offset += vdata.cards[k];
            }
            CHECK_GE(index_neighbor, 0);
            
            vector <int> neighbor_conf(vdata.nvars, 0);
            neighbor_conf[index_neighbor] = other_vertex.data().best_configuration;
           

            for (int state = 0; state < vdata.cards[index_neighbor]; ++state) {
                messages[offset + state] = edata.multiplier_messages[state] + edata.potentials[state]; 
                multipliers[offset + state] = edata.multiplier_messages[state]; 
                neighbor_distribution[offset + state] = other_vertex.data().beliefs[state];
            }
            
            gather_type gather_data(messages,neighbor_conf);
            gather_data.neighbor_distribution = neighbor_distribution;
            gather_data.multipliers = multipliers;
            return gather_data;
        }
            
        else if(vdata.factor_type == BUDGET) {
            //Budget factor
            factor_type messages, neighbor_distribution;        
            messages.setZero(vdata.nvars);
            neighbor_distribution.setZero(vdata.nvars);
            int index_neighbor = -1;
            for (int k = 0; k < vdata.nvars; ++k) {
                int vertex_id = vdata.neighbors[k];
                if (vertex_id == other_vertex.id()) {
                    index_neighbor = k;
                    break;}
            }
           messages[index_neighbor] =  edata.multiplier_messages[0] + edata.potentials[vdata.bound_states[index_neighbor]];

           neighbor_distribution[index_neighbor] = other_vertex.data().beliefs[vdata.bound_states[index_neighbor]];
           
           gather_type gather_data(messages);
           gather_data.neighbor_distribution = neighbor_distribution;
           return gather_data;
          
        }
    }; // end of gather function
    
    
   /**
     * \brief The apply function takes a vertex and a vector of numeric values 
     (a total) as input. 
     For a unary vertex, "total" will be the sum of local MAP vectors, and we 
     just need to divide by the vertex degree and save the result as global MAP.
     For higher-order factors, "total" will contain all the Lagrange multipliers 
     of the neighboring variables. So we need to loop through all possible factor 
     configurations, get the sequence of states of each configuration, fetch the 
     Lagrange multipliers for those states, and add them to the factor potential. 
     Then we compute solution of Quadratic subproblem and save result to local 
     MAP for each variable connected to the factor. 
     Note that since global MAP is computed from local MAP , locla MAP is needed 
     to be computed before global MAP. Hence in even iterations local MAP is 
     computed ( in scatter step) and in the subsequent iteration (which is hence 
     odd) global MAP (in apply step) and multiplier messages (in scatter)
     are updated.
     */  
  
  void apply(icontext_type& context, vertex_type& vertex, 
               const gather_type& total) {
        vertex_data& vdata = vertex.data();
     if (!opts.debug){
        if (vdata.factor_type == VAR ) {   
             if (context.iteration()%2 != 0) {
            // Unary factor. 
            //Find dual residual contrib
                vec dual_res_contrib;
                dual_res_contrib.setZero(vdata.cards[0]);
                for(int i=0; i<vdata.cards[0]; i++){
                   
                   dual_res_contrib[i] =  (total.messages[i] / static_cast<double>(vdata.unary_degree[i]))
                                                                                - vdata.beliefs[i] ;
                   dual_res_contrib[i] = dual_res_contrib[i] * dual_res_contrib[i]; 
            // update global MAP     
                   vdata.beliefs[i] = total.messages[i] / static_cast<double>(vdata.unary_degree[i]);
                }
                vdata.dual_res_contrib = dual_res_contrib.sum(); 
            // Find best configuration
                vdata.beliefs.maxCoeff(&vdata.best_configuration);
            //Find relaxed primal contribution
                vdata.primal_rel_contrib = vdata.potentials.dot(vdata.beliefs);
            // Find primal contribution
                vdata.primal_contrib = vdata.potentials[vdata.best_configuration];
                
            }
        } 
        else{  
            if(context.iteration()%2 == 0){
            // Dense and Budget factors
               vec additional_posteriors, variable_posteriors;
               additional_posteriors.setZero(vdata.potentials.size());
               variable_posteriors.setZero(vdata.potentials.size());
               if(vdata.factor_type == BUDGET){
                 additional_posteriors.setZero(vdata.nvars);
                 variable_posteriors.setZero(vdata.nvars); 
                }
            //Find dual contrib
                SolveMAP(vertex, total, variable_posteriors, additional_posteriors, vdata.dual_contrib);
            // Find relaxed primal contribution
                if(vdata.factor_type == DENSE) {
                  vdata.primal_rel_contrib = vdata.potentials.dot(vdata.factor_beliefs);
                }
                else if(vdata.factor_type == XOR){
                  vdata.primal_rel_contrib = vdata.potentials.dot(vdata.beliefs);
                }
            //Find primal residual contribution
                vec primal_res_contrib = vdata.beliefs - total.neighbor_distribution;
                for(int i=0;i< vdata.beliefs.size(); i++){
                   primal_res_contrib[i] = primal_res_contrib[i]*primal_res_contrib[i];
                }
                vdata.primal_res_contrib = primal_res_contrib.sum();
            // Compute QP subproblem solution
                compute_beliefs(vertex, total, vdata.beliefs, vdata.factor_beliefs);
               //cout<<vertex.id()<<" "<<vdata.factor_type<<" "<<vdata.beliefs<<endl;
            //Find primal contrib
                if(vdata.factor_type == DENSE){
                int conf_index = get_configuration_index(vertex, total.neighbor_conf);
                vdata.primal_contrib = vdata.potentials[conf_index]; }
                
            } 
          }
       }
      else usleep(1);
    }; // end of apply
    
       /**
     * \brief Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type scatter_edges(icontext_type& context,
                                const vertex_type& vertex) const { 
        return graphlab::ALL_EDGES; 
    }; // end of scatter edges
    
    
   /**
     * \brief The scatter function takes a vertex and an edge as input. 
     (1) If the vertex is a unary factor, we update the messages (Lagrange multipliers)
     by subtracting the global MAP by the local MAP.
     (2) If the vertex is a higher order factor, this function will take the best
     configuration (obtained at the apply function) and save the local MAP 
     at the corresponding edge.
     */
    void scatter(icontext_type& context, const vertex_type& vertex, 
                 edge_type& edge) const {  
        const vertex_data& vdata = vertex.data();
        edge_data& edata = edge.data();
        const vertex_type other_vertex = get_other_vertex(edge, vertex);
        if (!opts.debug){
         // Unary factor. Update the messages (Lagrange multipliers).   
            if (vdata.factor_type == VAR ){ 
                if (context.iteration()%2 != 0) {
           
                    double stepsize = update_stepsize(context, 0 , global_vars.old_dual, global_vars.primal_best, 
                                                       global_vars.sq_norm_g, global_vars.dual_inc_count, 0);
                    if(other_vertex.data().factor_type == BUDGET){
                       int index_neighbor = -1;
                       for(int i=0; i < other_vertex.data().nvars; i++) {
                           if(other_vertex.data().neighbors[i] == vertex.id()){
                              index_neighbor =i;
                              break; 
                           }
                       }
                      edata.multiplier_messages[0] +=(vdata.beliefs[other_vertex.data().bound_states[index_neighbor]]
                         - edata.local_messages[other_vertex.data().bound_states[index_neighbor]]) * stepsize;
                     }
                     else if(other_vertex.data().factor_type != BUDGET){
                            edata.multiplier_messages += (vdata.beliefs - edata.local_messages) * stepsize; 
                     } 
                }    
             } 
             else if(vdata.factor_type != BUDGET){   
                     if (context.iteration()%2 == 0) {
            //General factor. Update the local MAPs.
                        const vertex_type &unary_vertex = get_other_vertex(edge, vertex);
                        int offset = 0;
                        int index_neighbor = -1;
                        for (int k = 0; k < vdata.nvars; ++k) {
                            int vertex_id = vdata.neighbors[k];
                            if (vertex_id == unary_vertex.id()) {
                                index_neighbor = k;
                                break;
                            }
                            offset += vdata.cards[k];
                        }
                        CHECK_GE(index_neighbor, 0);
                        //CHECK_EQ(vdata.cards[index_neighbor], unary_vertex.data().cards[0]);
            
                        for(int state = 0; state < vdata.cards[index_neighbor]; state++){
                           edata.local_messages[state] = vdata.beliefs[offset+state];
                        }
                   } 
              }
              else if(vdata.factor_type == BUDGET){ 
               // Budget factor. Update local MAPs
                      if (context.iteration()%2 == 0) {
                         const vertex_type &unary_vertex = get_other_vertex(edge, vertex);
                         edata.local_messages.setZero();
                         int index_neighbor = -1;
                         for (int k = 0; k < vdata.nvars; ++k) {
                             int vertex_id = vdata.neighbors[k];
                             if (vertex_id == unary_vertex.id()) {
                                index_neighbor = k;
                                break;
                             }
                         }
                         edata.local_messages[vdata.bound_states[index_neighbor]] = 
                                                    vdata.beliefs[index_neighbor];
                      }
               } 
        }
        if ((context.iteration()+1) < opts.maxiter && global_vars.converged == false) {
            context.signal(vertex);
            context.signal(other_vertex);
        }
    }; // end of scatter
    
 }; /* end of admm_vertex_program */
 
/////////////////////////////////////////////////////////////////////////////////
//Aggregator functions to compute primal & dual objectives and residuals

objective sum(dd_vertex_program::icontext_type& context, const dd_vertex_program::vertex_type& vertex){
  objective retval;
  retval.primal = vertex.data().primal_contrib;
  retval.dual = vertex.data().dual_contrib;
  retval.sum_sq_norm_g = vertex.data().sum_sq_norm_g;
  retval.primal_rel = vertex.data().primal_rel_contrib;
  retval.dual_res  = vertex.data().dual_res_contrib;
  retval.primal_res = vertex.data().primal_res_contrib;
  retval.total_confs =(vertex.data().nvars ==1)?(std::accumulate(vertex.data().unary_degree.begin(),
                                                  vertex.data().unary_degree.end(),0)):0;
  global_vars.iter_at_aggregate = (context.iteration() +2); 
  
  return retval;
}

void print_obj(dd_vertex_program::icontext_type& context, objective total) {   //TODO make this function part of dd_vertex_program class
      if (context.iteration() % 2 == 0 || opts.algorithm == 0) { 
        //saving best values
         if (total.dual > global_vars.old_dual){
             global_vars.dual_inc_count ++;
         }
     
         if (total.primal> global_vars.primal_best) {
            global_vars.primal_best = total.primal;
         }

         if(opts.verbose >0) {
            cout<<"iteration: "<<context.iteration()<<" Dual Objective: " << total.dual<<
                   " "<<"Primal Objective: "<<total.primal<<endl;
            cout<< "Best Primal so far: "  << global_vars.primal_best<<" "<<endl;   
        //residuals only available in case of ad3
            if(opts.algorithm == 2) { 
              cout<<"Relaxed Primal Objective:"<<total.primal_rel<<endl;
              cout<<"Dual Residual:"<< sqrt(total.dual_res/total.total_confs)<<" "
              <<"Primal Residual:"<<sqrt(total.primal_res/total.total_confs)<<endl;
            }
        }
        //convergence criteria for dd, bethe admm 
        if ((std::fabs(total.dual-global_vars.old_dual) < opts.dualimprovthres) && opts.algorithm != 2) { 
            global_vars.converged = true;
           cout<< "Dual Objective: " << total.dual<< " "<<"Primal Objective: "<<total.primal<<endl;
           cout<<" Number of iteration at convergence:"<<context.iteration() +2 <<endl;
        }
        //convergence criteria for ad3
        if((sqrt(total.dual_res/total.total_confs) < opts.dualimprovthres
             && sqrt(total.primal_res/total.total_confs) < opts.dualimprovthres)
      && (std::fabs(total.dual-global_vars.old_dual) < opts.dualimprovthres) && opts.algorithm == 2){ 
           global_vars.converged = true;
           cout<< "Dual Objective: " << total.dual<< " "<<"Primal Objective: "<<total.primal<<endl;
           cout<<" Number of iteration at convergence:"<<context.iteration() +2 <<endl;       
        }
       
        global_vars.old_dual = total.dual;
        global_vars.sq_norm_g = total.sum_sq_norm_g;
       //saving history    
        if (opts.history_file != "\0"){
            global_vars.history[0].push_back(context.iteration()+2);
            global_vars.history[1].push_back(global_vars.timer.current_time());
            global_vars.history[2].push_back(total.dual);
            global_vars.history[3].push_back(total.primal);
        }
    } 
 
}

/* end of aggregator functions */

 
#endif


================================================
FILE: toolkits/graphical_models/dd_main.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application performs MAP inference on Markov Nets 
 * provided in standard UAI file format via Dual-Decomposition. 
 *
 *
 *  \authors Dhruv Batra, André Martins, Aroma Mahendru
 */


#include "dd_grlab.hpp"
#include "ad3_qp.hpp"
#include "dd_main.hpp"

Options opts;

/////////////////////////////////////////////////////////////////////////
// Main function
int main(int argc, char** argv) 
{
    global_logger().set_log_level(LOG_INFO);
    global_logger().set_log_to_console(true);
    
    ///! Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    graphlab::distributed_control dc;
    
    
    // Parse command line options -----------------------------------------------
    const std::string description = "Dual-Decomposition for MAP-MRF Inference";
    graphlab::command_line_options clopts(description);
    
    clopts.attach_option("graph", opts.graph_file,
                         "The path to UAI file containing the factors");
    clopts.add_positional("graph");
    clopts.attach_option("output", opts.output_dir,
                         "The directory in which to save the predictions");
    clopts.add_positional("output");
    clopts.attach_option("history", opts.history_file, " for saving objective values");
    clopts.add_positional("history");
    clopts.attach_option("step_size", opts.step_size, "initial/fixed stepsize");
    clopts.add_positional("step_size");
    clopts.attach_option("debug_mode", opts.debug, "to activate debug mode set it to true");
    clopts.add_positional("debug_mode");
    clopts.attach_option("dualimprovthres", opts.dualimprovthres,
                         "The tolerance level for Dual Convergence.");
     clopts.add_positional("dualimprovthres");                    
    //clopts.attach_option("pdgapthres", opts.pdgapthres,
    //                     "The tolerance level for Primal-Dual Gap.");
    clopts.attach_option("maxiter", opts.maxiter,
                         "The maximum no. of DD iterations.");
    clopts.attach_option("verbose", opts.verbose,
                         "Verbosity of Printing: 0 (default, no printing) or 1 (lots).");
    clopts.attach_option("engine", opts.exec_type,
                         "The type of engine to use {async, sync}.");
    clopts.attach_option("algorithm", opts.algorithm, 
                         "specify type of algorithm: 0 for dd_symmetric, 1 for dd_projected, 2 for admm");
    clopts.add_positional("algorithm");
    clopts.attach_option("format", opts.file_format, 
                         "specify file format : uai or distr_uai");
    clopts.add_positional("format");
    clopts.attach_option("agg_time", opts.agg_time, 
                         "specify the time period after aggregator works");
    clopts.add_positional("agg_time");
    clopts.attach_option("alpha", opts.alpha, 
                         "specify the value of parameter alpha for bethe admm");
    clopts.add_positional("alpha");
    
    if(!clopts.parse(argc, argv)) 
    {
        graphlab::mpi_tools::finalize();
        return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
    }
    
    if(opts.graph_file.empty()) 
    {
        logstream(LOG_ERROR) << "No adjacency file provided." << std::endl;
        return EXIT_FAILURE;
    }

   
    ///! display settings  
    if(dc.procid() == 0) 
    {
        std::cout 
        << "ncpus:          " << clopts.get_ncpus() << std::endl
        << "engine:         " << opts.exec_type << std::endl
        << "scheduler:      " << clopts.get_scheduler_type() << std::endl
        << "graph_file:     " << opts.graph_file << std::endl
        << "verbose:        " << opts.verbose << std::endl
        ;
    }
    
       
    // Instantiate graph object
    graph_type graph(dc, clopts);  
    
    
    // load the graph
    //    graph.load(prior_dir, vertex_loader);
    //    graph.load(graph_dir, edge_loader);
    int nnodes  = 0;
    if(opts.file_format == "uai")
    loadUAIfile(dc, graph, opts.graph_file, nnodes);
    else
    graph.load(opts.graph_file.c_str(), line_parser);

    graph.finalize();
  
    // run dual decomposition    
    switch (opts.algorithm){
    case 1: run_dd_projected(dc, graph, opts.exec_type, clopts);
            break;
    case 2: run_ad3(dc, graph, opts.exec_type, clopts);
            break;
    case 3: run_bethe_admm(dc, graph, opts.exec_type, clopts);
            break;
    default : run_dd_symmetric(dc, graph, opts.exec_type, clopts);
    }
    
    // save predictions
    if(opts.output_dir.find("/", opts.output_dir.size()-1) == std::string::npos){
    opts.output_dir.append("/"); }
    opts.output_dir.append("output.txt");
    graph.save(opts.output_dir.c_str(), graph_writer(), false, true, false,1); 

    // save history 
    if ( opts.history_file.size() < 4 || opts.history_file.find(".txt",opts.history_file.size()-4) == std::string::npos)
    {opts.history_file.append(".txt");} 
    char *filename = (char*)opts.history_file.c_str();
    ofstream file;
    file.open(filename);
    int i = 0;
    while(i< global_vars.history[0].size())
    { file<<global_vars.history[0][i]<<" "<<global_vars.history[1][i]<<" "
          <<global_vars.history[2][i]<<" "<<global_vars.history[3][i]<<endl;
     i++;}
    file.close();
    
    graphlab::mpi_tools::finalize();
    return EXIT_SUCCESS;
    
    
} // end of main


================================================
FILE: toolkits/graphical_models/dd_main.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application performs MAP inference on Markov Nets 
 * provided in standard UAI file format via Dual-Decomposition. 
 *
 *
 *  \authors Dhruv Batra, André Martins, Aroma Mahendru
 */


#ifndef __DD_MAIN_H__
#define __DD_MAIN_H__

#include <iostream>
#include <fstream>
#include <string>
#include <sys/stat.h>
#include <dirent.h>
#include <sys/time.h>
//#include <ctime
#include <getopt.h>

//#include "utils.h"
#include "utils.hpp"
#include "dd_grlab.hpp"
#include "ad3_qp.hpp"


/////////////////////////////////////////////////////////////////////////
// Load the UAI file. Each factor as a different vertex
void loadUAIfile(graphlab::distributed_control& dc, graph_type& graph, string graph_file, int& nodes) 
{  
    // Not sure why this is needed
    dc.barrier();
    // Open file
    ifstream in(graph_file.c_str());
     
    //CHECK(in.good(),"Could not open file: "+graph_file);
    CHECK(in.good());
    
    // Read type of network
    string name; 
    
    in >> name; 
    //CHECK(name.compare("MARKOV")==0, "Only Markov networks are supported. Are you sure this is a typeUAI energy file?");
    CHECK(name.compare("MARKOV")==0);
    
    // Read size of graph
    int nnodes, nfactors;
    in >> nnodes;
    nodes = nnodes;
    //CHECK(nnodes>0, "No. of nodes can't be negative. Are you sure this is a typeUAI energy file?");
    CHECK(nnodes>0);    
    // Read node cardinalities
    vector<int> cardinalities(nnodes,0);
    int cardinality_i, sum_of_cardinalities = 0;
    for (int i = 0; i != nnodes; ++i) 
    {
        in >> cardinality_i;
        cardinalities[i] = cardinality_i;
        sum_of_cardinalities += cardinality_i;
       
        //CHECK(in.good(), "Could not finish reading cardinalities. Are you sure this is a typeUAI energy file?");
        CHECK(in.good());
    }

    int vid = 0;
    if(opts.algorithm != 0){
       for(int i = 0; i < nnodes; i++){                      //temporary .. put condition
           vertex_data vdata;
           vdata.factor_type = VAR; 
           vdata.nvars = 1;
           vdata.cards.resize(1, cardinalities[i]);
           vdata.potentials.setZero(cardinalities[i]);
           vdata.beliefs.setConstant(cardinalities[i], 0.5);
           graph.add_vertex(vid, vdata);
           vid++;
       }
    }
    // Read no. of factors
    in >> nfactors;
    
    //factor_size.resize(nfactors); factor_id.resize(nfactors);
    vector<int> factor_size(nfactors,0); //vector<int> factor_id(nfactors,0); 
    vector< vector<int> > factor_memb; factor_memb.resize(nfactors);
    int temp1, temp2;
    
    // Loop and read factor members
    for (int i=0; i!=nfactors; ++i) 
    {
        in >> temp1;
        factor_size[i] = temp1; 
        
        factor_memb[i].resize(temp1);
        for (int j=0; j!=temp1; ++j) 
        {
            in >> temp2;
            factor_memb[i][j] = temp2;
        }
        
        //CHECK(in.good(), "Could not finish reading cardinalities. Are you sure this is a typeUAI energy file?");
        CHECK(in.good());
    }
    
    if (opts.verbose > 1)
        cout 
        << "Finished Reading UAI-Preamble:"
        << " #Nodes = " << nnodes 
        << ", #Factors = "<< nfactors 
        << ", Average Cardinality = " << double(sum_of_cardinalities)/nfactors
        << "\n";
        
        
    // Now read factor potentials
    for (int i=0; i!=nfactors; ++i) 
    {
        int cardprod; double potential_value; //, energy;
        in >> cardprod;
        
        vertex_data vdata;        
        vdata.nvars = factor_size[i];
        if (vdata.nvars > 1) {
          vdata.degree = vdata.nvars; // Factor degree.
          vdata.factor_type = DENSE;
        }
        else {
          vdata.degree = 1; // Factor degree.
          vdata.factor_type = XOR;
        }
        vdata.cards.resize(factor_size[i]);
        vdata.neighbors.resize(factor_size[i]);
        
        vector<edge_data> edata(factor_size[i]);
        vector<int> varid(factor_size[i]);
        vector<int> card(factor_size[i]);
        
        int cardprod2 = 1;
        for (int j=0; j!=factor_size[i]; ++j) 
        {
            vdata.cards[j] = cardinalities[factor_memb[i][j]];
            vdata.neighbors[j] = factor_memb[i][j]; // afm (check if this was intended!)
            cardprod2 *= vdata.cards[j];
                      
            // Also create edge structs here
            //if (factor_size[i]>1)
           // {
                varid[j] = factor_memb[i][j];
                card[j] = cardinalities[varid[j]];
                edata[j].multiplier_messages.setZero(card[j]);
                edata[j].local_messages.setZero(card[j]);
                edata[j].potentials.setZero(card[j]);
          //  }
        }
        
        //CHECK_EQ(cardprod, cardprod2, "Incorrectly sized factor");
        CHECK_EQ(cardprod, cardprod2);
        
        // Read factor potentials
        vdata.potentials.resize(cardprod);
        vdata.beliefs.resize(cardprod);
        int x_offset = 0;
        for(int x=0; x< vdata.nvars; x++){
            for(int y=0; y<vdata.cards[x]; y++){
               vdata.beliefs[x_offset+y] = 1.0/vdata.cards[x];
            }
            x_offset += vdata.cards[x];
        }         
            
        vdata.factor_beliefs.setConstant(cardprod, 1.0/cardprod);
        for (int k = 0; k != cardprod; ++k) 
        {
            in >> potential_value;
            //energy = Potential2Energy(potential_value);
            
            vdata.potentials[k] = log10(potential_value) ;
        }
        
        //CHECK(in.good(), "Could not finish reading factor tables. Are you sure this is a typeUAI energy file?");

        CHECK(in.good());
         

         vdata.potentials.maxCoeff(&vdata.best_configuration);
        // allocate factors evenly to different machines.
        if (i%dc.numprocs() != dc.procid()) 
            continue;
        
        // If all is well, add vertex and edge
        graph.add_vertex(vid ,vdata);

        if (factor_size[i] > 1 || opts.algorithm > 0) // if not a unary, add edges to unaries
        for (int j=0; j!=factor_size[i]; ++j) 
            graph.add_edge(vid,varid[j],edata[j]);
        
        //after adding everything increment vertex id
        vid++; 
        
        if (opts.verbose > 1)
        {
            cout << "Machine #" << dc.procid() << ", Vertex Id = " << i
            << " with " << vdata.nvars << " variables."; 
            if (factor_size[i] > 1)
            {
                cout << ", Edges = ";
                for (int j=0; j!=factor_size[i]; ++j)             
                    cout << ", (" << i << "," << varid[j] << ")";
            }
            cout << "\n";
            cout << "potential: " << vdata.potentials << "\n";
        }
        
    } // End of reading factors     
   
    dc.barrier();
} // end of loading UAI file

/////////////////////////////////////////////////////////////////////////
// Load the distributed UAI file
bool line_parser(graph_type& graph, const std::string& filename, const std::string& textline) {
    std::stringstream strm(textline);
    graphlab::vertex_id_type vid;
    vertex_data vdata;
    vdata.dual_contrib = 0.0;
    string type;
    strm >> type;
 
     if(type == "v") { 
      vdata.factor_type = VAR;
      vdata.nvars = 1;
      vdata.cards.resize(1);
      strm>>vid;
      strm >> vdata.cards[0];
      vdata.potentials.resize(vdata.cards[0]);
      //vdata.beliefs.setOnes(vdata.cards[0]);
      //vdata.beliefs /= vdata.cards[0];
      vdata.beliefs.setConstant(vdata.cards[0], 0.5);
      vdata.unary_degree.resize(vdata.cards[0], 0);
      //for(int i=0; i< vdata.cards[0]; i++){
      // strm>>vdata.potentials[i];
      //   vdata.potentials[i] = log10(vdata.potentials[i]);
         
      //   }
     //    vdata.potentials.maxCoeff(&vdata.best_configuration);
      graph.add_vertex(vid,vdata);
    }
   else if(type == "d" || type == "u") {
     vdata.factor_type = (type=="d")?DENSE:XOR;
     if(vdata.factor_type == DENSE)
     strm>>vdata.nvars;
     else 
     vdata.nvars = 1;
     strm>>vid;
     vdata.neighbors.resize(vdata.nvars);
     vdata.cards.resize(vdata.nvars);
     int cardprod = 1;
     int cardsum =0;
     for(int i=0; i<vdata.nvars; i++){
        strm>>vdata.neighbors[i]; 
        }
     for(int i=0; i<vdata.nvars; i++){
        strm>>vdata.cards[i]; 
        cardprod *=vdata.cards[i];
        cardsum +=vdata.cards[i];}
     vdata.potentials.setZero(cardprod);
     vdata.beliefs.setOnes(cardprod);
     vdata.beliefs /=cardsum;
     //vdata.beliefs.setConstant(cardprod, 0.5);
     vdata.factor_beliefs.setOnes(cardprod);
     vdata.factor_beliefs /= cardprod;
     for(int i=0; i<cardprod; i++){
        strm>>vdata.potentials[i]; 
        vdata.potentials[i] = log10(vdata.potentials[i]);
        }
        vdata.potentials.maxCoeff(&vdata.best_configuration);
     graph.add_vertex(vid, vdata);
     edge_data edata;
     for(int i=0; i<vdata.nvars; i++)  {
        edata.multiplier_messages.setZero(vdata.cards[i]);
        edata.local_messages.setZero(vdata.cards[i]);
        edata.potentials.setZero(vdata.cards[i]);
        graph.add_edge(vid, vdata.neighbors[i], edata);
     }  
   }
   else if(type == "b") {
   vdata.factor_type = BUDGET;
     strm>>vdata.nvars;
     strm>>vid;
     vdata.neighbors.resize(vdata.nvars);
     vdata.bound_states.resize(vdata.nvars);
     vdata.cards.resize(vdata.nvars);
     vdata.beliefs.setZero(vdata.nvars);
     for(int i=0; i<vdata.nvars; i++){
        strm>>vdata.neighbors[i]; }
        for(int i=0; i<vdata.nvars; i++){
        strm>>vdata.cards[i]; }
     for(int i=0; i<vdata.nvars; i++){
        strm>>vdata.bound_states[i]; }
     strm>>vdata.budget;
     graph.add_vertex(vid, vdata);
     edge_data edata;
     for(int i=0; i<vdata.nvars; i++)  {
        edata.multiplier_messages.setZero(1);
        edata.local_messages.setZero(vdata.cards[i]);
        edata.potentials.setZero(vdata.cards[i]);
        graph.add_edge(vid, vdata.neighbors[i], edata);
     } 
    }
    return true;
 }
/* end of graph loading functions */


////////////////////////////////////////////////////////////////////////////
// Graph transform functions for computing degree and dividing potentials
void compute_degree(graph_type::vertex_type& vertex)
{ 
   vertex.data().degree = vertex.num_out_edges() + vertex.num_in_edges();
   
}

void dist_unary_potentials(graph_type::edge_type& edge)
{ vertex_data& vdata = (edge.source().data().factor_type == VAR)?edge.source().data():edge.target().data();
  edge.data().potentials = vdata.potentials/vdata.degree;
 
}
struct gather_potentials{
   vector<int> degree;
   vec potentials;

    void load(graphlab::iarchive& arc) {
        arc >>degree>>potentials;
    }
    void save(graphlab::oarchive& arc) const {
        arc <<degree<<potentials;
    }

gather_potentials& operator+=(const gather_potentials& other)
{ degree += other.degree;
   potentials += other.potentials;
   return *this;
 } 

};

/* Brief In case of graphs with budget factors degree cannot be determined 
* simply by transform function. A separate vertex program iteration is 
* required. compute_degree_budget computes degree of each vertex and 
* divides unary potentials accordingly. */
struct compute_degree_budget : 
public graphlab::ivertex_program< graph_type, gather_potentials,
graphlab::messages::sum_priority >,
public graphlab::IS_POD_TYPE {

edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const 
    {  return graphlab::ALL_EDGES; 
     };

gather_potentials gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const 
     { 
       const vertex_data& vdata = vertex.data();
       const vertex_type& other_vertex = (edge.source().id() == vertex.id())?edge.target():edge.source();
       vector <int> degree;
       vec potentials;
       
       if(vdata.factor_type == VAR)  {
          potentials.resize(vdata.cards[0]);
          potentials.setZero();
          switch(other_vertex.data().factor_type){
          case XOR : potentials = other_vertex.data().potentials;
       
          case DENSE : degree.resize(vdata.potentials.size(),1);
       
                       break;
          
          case BUDGET : degree.resize(vdata.potentials.size(), 0);
                        int index_neighbor = -1;
                        for(int i=0; i< other_vertex.data().nvars; i++){
                            if(other_vertex.data().neighbors[i] == vertex.id()){
                               index_neighbor = i;
                               break;
                              }
                        }
       
                       degree[other_vertex.data().bound_states[index_neighbor]] = 1;
          }
       }
       else {
         degree.resize(1);
         potentials.resize(1);
       
       }   
       gather_potentials gather_data;
       gather_data.degree  = degree;
       gather_data.potentials = potentials;
       return gather_data; 
     };
       
void apply(icontext_type& context, vertex_type& vertex, const gather_potentials& total)
     { vertex_data& vdata =  vertex.data();
       if(vdata.factor_type == VAR) {
          vdata.unary_degree = total.degree;
          vdata.potentials = total.potentials;
         } 
    
      };

edge_dir_type scatter_edges(icontext_type& context,
                               const vertex_type& vertex) const 
    { 
     return graphlab::ALL_EDGES; };

void scatter(icontext_type& context, const vertex_type& vertex, edge_type& edge) const 
     { const vertex_data& vdata = vertex.data();
       const vertex_type& other_vertex = (edge.source().id() == vertex.id())?edge.target():edge.source();
       if(vdata.factor_type == VAR) {  
          if(other_vertex.data().factor_type != BUDGET) {
             for(int i =0; i< vdata.potentials.size(); i++){
               edge.data().potentials[i] = vdata.potentials[i]/vdata.unary_degree[i];
               }
           }
          else if(other_vertex.data().factor_type == BUDGET) {
               int index_neighbor = -1;
               edge.data().potentials.setZero();
               for(int i=0; i< other_vertex.data().nvars; i++){
                  if(other_vertex.data().neighbors[i] == vertex.id()){
                     index_neighbor = i;
                      break;}
                }
               int state_index = other_vertex.data().bound_states[index_neighbor];
               edge.data().potentials[state_index]  = vdata.potentials[state_index]/vdata.unary_degree[state_index];
              }
            }  
           //cout<<"complete scatter"<<endl;          
       };
};


////////////////////////////////////////////////////////////////////////////
// Graph writer class for saving MAP values. Only unary vertices are saved.
class graph_writer {
public:
std::string save_vertex(graph_type::vertex_type v) {
std::stringstream strm;
if(v.data().factor_type == VAR)
strm << v.id() << "\t" << v.data().best_configuration<< "\n";
return strm.str();
 }
std::string save_edge(graph_type::edge_type e) { return ""; }
 }; /* end of graph_writer */


////////////////////////////////////////////////////////////////////////////
// Functions for running dd , admm

void run_dd_symmetric(graphlab::distributed_control& dc, graph_type& graph, 
               std::string exec_type, graphlab::command_line_options clopts){
    // Define the engine.   
    typedef graphlab::omni_engine<dd_vertex_program_symmetric> engine_type;
    // Instantiate the engine object  
    engine_type engine(dc, graph, opts.exec_type, clopts);
    engine.signal_all();
    graphlab::timer timer;
    // Attach an aggregator to compute primal/dual objective, with periodic interval specified in cmdline argument.
    engine.add_vertex_aggregator<objective>("pd_obj",sum, print_obj); 
    if(!opts.debug){
     engine.aggregate_periodic("pd_obj",opts.agg_time); }
    //The main command. Run graphlab
    engine.start();  
    engine.aggregate_now("pd_obj");
    const double runtime = timer.current_time();    
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    }
    /* end of run_dd_symmetric */
    
    void run_dd_projected(graphlab::distributed_control& dc, graph_type& graph, 
                   std::string exec_type, graphlab::command_line_options clopts){
   
    // Instantiate the engine object
    graph.transform_vertices(compute_degree);
    graph.transform_edges(dist_unary_potentials);
     // Define the engine.    
    typedef graphlab::omni_engine<dd_vertex_program_projected> engine_type;
    engine_type engine(dc, graph, opts.exec_type, clopts);
    engine.signal_all();
    graphlab::timer timer;    
    // Attach an aggregator to compute primal/dual objective, with periodic interval specified in cmdline argument.
    engine.add_vertex_aggregator<objective>("pd_obj",sum, print_obj); 
    if (!opts.debug){
     engine.aggregate_periodic("pd_obj", opts.agg_time);}
    // The main command. Run graphlab
    engine.start();  
    engine.aggregate_now("pd_obj");
    const double runtime = timer.current_time();    
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    }
     /* end of run_dd_projected */
    
    void run_ad3(graphlab::distributed_control& dc, graph_type& graph, 
              std::string exec_type, graphlab::command_line_options clopts){
    // Define the engine.
    typedef  graphlab::omni_engine<compute_degree_budget> transform_engine;
    transform_engine distribute_potentials(dc, graph, opts.exec_type, clopts);
    distribute_potentials.signal_all();
    distribute_potentials.start();
     typedef graphlab::omni_engine<ad3_vertex_program> engine_type;
    // Instantiate the engine object
    engine_type engine(dc, graph, opts.exec_type, clopts);
    engine.signal_all();
    graphlab::timer timer;
    // Attach an aggregator to compute primal/dual objective, with periodic interval specified in cmdline argument.
    engine.add_vertex_aggregator<objective>("pd_obj",sum, print_obj);
    if(!opts.debug){ 
     engine.aggregate_periodic("pd_obj",opts.agg_time); }
    // The main command. Run graphlab
    engine.start();  
    engine.aggregate_now("pd_obj");
    const double runtime = timer.current_time();    
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    
    }
    /* end of run_admm */

 void run_bethe_admm(graphlab::distributed_control& dc, graph_type& graph, 
              std::string exec_type, graphlab::command_line_options clopts){
    // Define the engine.
    typedef  graphlab::omni_engine<compute_degree_budget> transform_engine;
    transform_engine distribute_potentials(dc, graph, opts.exec_type, clopts);
    distribute_potentials.signal_all();
    distribute_potentials.start();
    typedef graphlab::omni_engine<bethe_admm_vertex_program> engine_type;
    // Instantiate the engine object
    engine_type engine(dc, graph, opts.exec_type, clopts);
    engine.signal_all();
    graphlab::timer timer;
    // Attach an aggregator to compute primal/dual objective, with periodic interval specified in cmdline argument.
    engine.add_vertex_aggregator<objective>("pd_obj",sum, print_obj);
    if(!opts.debug){ 
     engine.aggregate_periodic("pd_obj",opts.agg_time); }
    // The main command. Run graphlab
    engine.start();  
    engine.aggregate_now("pd_obj");
    const double runtime = timer.current_time();    
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    }
    /* end of run_bethe_admm */
#endif


================================================
FILE: toolkits/graphical_models/dd_opts.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application performs MAP inference on Markov Nets 
 * provided in standard UAI file format via Dual-Decomposition. 
 *
 *
 *  \authors Dhruv Batra, André Martins, Aroma Mahendru
 */


#ifndef __DD_OPTS_HPP__
#define __DD_OPTS_HPP__

#include <string>
#define VAR 0
#define XOR 1
#define DENSE 2
#define BUDGET 3
/////////////////////////////////////////////////////////////////////////
// Option Struct
struct Options 
{
    // graphlab options
    std::string exec_type;
    
    // input output dirs
    std::string graph_file;
    std::string output_dir;
    std::string history_file;
    std::string file_format;
    std::string output_file;

    int verbose;
    int algorithm;  
    int maxiter;

    double dualimprovthres;
    double pdgapthres;
    double alpha;
    double step_size; 
    double agg_time; 
    
    bool debug;
    
    // Default values
    Options(): 
    exec_type("sync"),
    output_dir("./"),
    history_file("\0"),
    file_format("uai"),
    output_file("output"),
    verbose(0),
    algorithm(0),
    maxiter(10000),
    dualimprovthres(1e-12),
    pdgapthres(1e-1),
    alpha(1),
    step_size(1.0),
    agg_time(1e-4),
    debug(false)
    {}
};

extern Options opts;

#endif


================================================
FILE: toolkits/graphical_models/deprecated/factors/CMakeLists.txt
================================================
project(GraphicalModels)
# add_library(factors STATIC
#   binary_factor.cpp
#   discrete_variable.cpp
#   unary_factor.cpp)

================================================
FILE: toolkits/graphical_models/deprecated/factors/binary_factor.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab/factors/binary_factor.hpp>

std::ostream& operator<<(std::ostream& out, 
                         const graphlab::binary_factor& fact) {
  out << "Binary Factor(v_" << fact.var1() << " in {1..."
      << fact.arity1() << "}, " 
      << ", v_ " << fact.var2() << " in {1..." 
      << fact.arity2() << "})" << std::endl;
  for(uint16_t i = 0; i < fact.arity1(); ++i) {
    for(uint16_t j = 0; j < fact.arity2(); ++j) {
      out << fact.logP(i,j) << " ";
    }
    out << std::endl;
  }
  return out;
} // end of operator<<


================================================
FILE: toolkits/graphical_models/deprecated/factors/binary_factor.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 */


#ifndef BINARY_FACTOR_HPP
#define BINARY_FACTOR_HPP


/**
 * This file contains the definitions of some of the basic factor
 * types needed for loopy belief propagation.  This is demo code and
 * is intentionally kept as simple as possible.
 *
 *  \author Joseph Gonzalez
 */


// Including Standard Libraries
#include <cassert>
#include <cmath>
#include <iostream>
#include <algorithm>
#include <limits>

// Random number generation
#include <graphlab/parallel/pthread_tools.hpp>

#include <graphlab/serialization/serialization_includes.hpp>

// Include the macro for the for each operation
#include <graphlab/macros_def.hpp>


/**
 * A binary factor is a table over a pair of variables and is
 * assocaited with each edge in a pairwise markov random field.  All
 * data is represented in log form.
 */
class binary_factor {
  
public:
    
  binary_factor(uint32_t var1 = 0,
                uint16_t arity1 = 0,
                uint32_t var2 = 0,
                uint16_t arity2 = 0) :
    _var1(var1), _arity1(arity1), _var2(var2), _arity2(arity2),
    _data(arity1 * arity2) { }

  binary_factor(const binary_factor& other) :
    _var1(other._var1), _arity1(other._arity1),
    _var2(other._var2), _arity2(other._arity2),
    _data(other._data) { }

  binary_factor& operator=(const binary_factor& other) {
    _var1 = other._var1;
    _arity1 = other._arity1;
    _var2 = other._var2;
    _arity2 = other._arity2;
    _data = other._data;
    return *this;
  }

  void resize(uint16_t arity1, uint16_t arity2) {
    _arity1 = arity1;
    _arity2 = arity2;
    _data.resize(_arity1 * _arity2);
  }
  
  uint32_t& var1() { return _var1;  } 
  const uint32_t& var1() const { return _var1; }
  uint32_t& var2() { return _var2;  } 
  const uint32_t& var2() const { return _var2; } 

  const uint16_t& arity1() const { return _arity1; }
  const uint16_t& arity2() const { return _arity2; } 

  /** Get the value of the factor.  In var1 == var2 the variables
      are ignored. */
  double& logP(uint32_t x1, uint16_t asg1,
               uint32_t x2, uint16_t asg2) {
    // If the factor is not symmetric then we may have to match the
    // arguments
    if( _var1 != _var2 ) {
      assert((x1 == var1() && x2 == var2()) ||
             (x2 == var1() && x1 == var2()));
      if(x1 == var2() && x2 == var1()) std::swap(asg1, asg2);
    }
    assert( asg1 < arity1() );
    assert( asg2 < arity2() );
    // return value
    return _data[asg1 + asg2 * arity1()];
  } // end of logP for a binary factor


  const double& logP(uint32_t x1, uint16_t asg1,
                     uint32_t x2, uint16_t asg2) const {
    // If the factor is not symmetric then we may have to match the
    // arguments
    if( _var1 != _var2 ) {
      assert((x1 == var1() && x2 == var2()) ||
             (x2 == var1() && x1 == var2()));
      if(x1 == var2() && x2 == var1()) std::swap(asg1, asg2);
    }
    ASSERT_LT( asg1 , arity1() );
    ASSERT_LT( asg2 , arity2() );
    // return value
    return _data[asg1 + asg2 * arity1()];
  } // end of logP for a binary factor

  
  double& logP(uint16_t asg1, uint16_t asg2) {
    ASSERT_LT( asg1 , arity1() );
    ASSERT_LT( asg2 , arity2() );
    return _data[asg1 + asg2 * arity1()];
  } // end of logP for a binary factor


  const double& logP(uint16_t asg1, uint16_t asg2) const {
    assert( asg1 < arity1() );
    assert( asg2 < arity2() );
    return _data[asg1 + asg2 * arity1()];
  } // end of logP for a binary factor

    /** ensure that sum_x this(x) = 1 */
  inline void normalize() {
    assert(arity1() > 0);
    assert(arity2() > 0);
    // Compute the max value
    double max_value = logP(0,0);
    for(uint16_t asg1 = 0; asg1 < arity1(); ++asg1) 
      for(uint16_t asg2 = 0; asg2 < arity2(); ++asg2)  
        max_value = std::max(max_value, 
                             logP(asg1, asg2));
    assert( !std::isinf(max_value) );
    assert( !std::isnan(max_value) );
    // scale and compute normalizing constant
    double Z = 0.0;
    for(uint16_t asg1 = 0; asg1 < arity1(); ++asg1) 
      for(uint16_t asg2 = 0; asg2 < arity2(); ++asg2)  
        Z += std::exp(logP(asg1, asg2) -= max_value);
    assert( !std::isinf(Z) );
    assert( !std::isnan(Z) );
    assert( Z > 0.0);
    double logZ = std::log(Z);
    // Normalize
    for(uint16_t asg1 = 0; asg1 < arity1(); ++asg1) 
      for(uint16_t asg2 = 0; asg2 < arity2(); ++asg2)  
        logP(asg1, asg2) -= logZ;
  } // End of normalize

  
  void set_as_agreement(double lambda) {
    for(uint16_t i = 0; i < arity1(); ++i) { 
      for(uint16_t j = 0; j < arity2(); ++j) { 
        if( i != j) logP(i,j) = -lambda;
        else logP(i,j) = 0;
      }
    }
  } // end of set_as_agreement
  
  void set_as_laplace(double lambda) {
    for(uint16_t i = 0; i < arity1(); ++i) { 
      for(uint16_t j = 0; j < arity2(); ++j) { 
        logP(i,j) = -std::abs(double(i) - double(j)) * lambda;
      }
    }
  } // end of set_as_laplace


    /**
     * Compute the Mooji Kappen Message derivative. 
     */
  double mk_derivative() const {
    double max_value = -std::numeric_limits<double>::max();
    for(uint16_t a = 0; a < arity1(); ++a) {
      for(uint16_t b = 0; b < arity2(); ++b) {
        for(uint16_t x = 0; x < arity1(); ++x) {
          for(uint16_t y = 0; y < arity2(); ++y) {
            if(a != x && b != y) {
              double value =
                (logP(a,b) + logP(x,y) - (logP(x,b) + logP(a,y)))/4.0;
              value = std::tanh(value);
              max_value = std::max(max_value, value);
            }
          }
        }
      }
    }
    return max_value;
  }

  //! Compute the Ihler dynamic range
  double ihler_dynamic_range() const {
    double min_value = *std::min_element(_data.begin(), _data.end());
    double max_value = *std::max_element(_data.begin(), _data.end());
    return std::exp((max_value - min_value)/2);
  }

    
  //! Print the factor description
  void printP(std::ostream& out) const {
    out << "Binary Factor(v_" << var1() << " in {1..."
        << arity1() << "}, " 
        << ", v_ " << var2() << " in {1..." 
        << arity2() << "})" << std::endl;
    for(uint16_t i = 0; i < arity1(); ++i) {
      for(uint16_t j = 0; j < arity2(); ++j) {
        out << std::exp(logP(i,j)) << " ";
      }
      out << std::endl;
    }
  }

  //! Save the factor to a file
  void save(graphlab::oarchive &oarc) const {
    oarc << _var1 << _arity1 
         << _var2 << _arity2
         << _data;
  }

  //! Load the factor from a file
  void load(graphlab::iarchive &iarc) {
    iarc >> _var1 >> _arity1 
         >> _var2 >> _arity2
         >> _data;
  }


private:
  uint32_t _var1;
  uint16_t _arity1;
  uint32_t _var2;
  uint16_t _arity2;
  std::vector<double> _data;
    
}; // end of class binary_factor


inline std::ostream& operator<<(std::ostream& out, 
                                const binary_factor& fact) {
  out << "Binary Factor(v_" << fact.var1() << " in {1..."
      << fact.arity1() << "}, " 
      << ", v_ " << fact.var2() << " in {1..." 
      << fact.arity2() << "})" << std::endl;
  for(uint16_t i = 0; i < fact.arity1(); ++i) {
    for(uint16_t j = 0; j < fact.arity2(); ++j) {
      out << fact.logP(i,j) << " ";
    }
    out << std::endl;
  }
  return out;
} // end of operator<<


#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: toolkits/graphical_models/deprecated/factors/discrete_variable.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab/factors/discrete_variable.hpp>

std::ostream& operator<<(std::ostream& out, 
                         const graphlab::discrete_variable& var) {
  // return out << "v_" << var.id()
  //            << " in {0:" << var.size()-1 << "}";
  return out << var.id();
}


================================================
FILE: toolkits/graphical_models/deprecated/factors/factor_includes.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "discrete_variable.hpp"
#include "unary_factor.hpp"
#include "binary_factor.hpp"
#include "table_factor.hpp"


================================================
FILE: toolkits/graphical_models/deprecated/factors/factor_test.cxx
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include <vector>
#include <algorithm>
#include <iostream>


#include <cxxtest/TestSuite.h>


#include <graphlab/parallel/pthread_tools.hpp>

#include "factors/factor_includes.hpp"


using namespace graphlab;


class factor_tests : public CxxTest::TestSuite {
public:

  void test_variables() {
    std::cout << "Test Variables" << std::endl;
    discrete_variable v1(1, 3);
    std::cout << v1 << std::endl;

    discrete_variable v2(2, 4);
    std::cout << v2 << std::endl;
  
    discrete_variable v3(3, 2);
    std::cout << v3 << std::endl;

    TS_ASSERT_EQUALS( v1, v1 );
    TS_ASSERT_DIFFERS( v1, v2 );
    TS_ASSERT_DIFFERS( v1, v3 );
    TS_ASSERT_LESS_THAN(v1, v2);
    TS_ASSERT_LESS_THAN(v2, v3);       
  }

  void test_domain() {
    std::cout << "Test domain" << std::endl;
    const size_t max_dim = 5;
    typedef discrete_domain<max_dim> domain_type;
    
    discrete_variable v1(1, 3);
    discrete_variable v2(2, 4);
    discrete_variable v3(3, 2);
    discrete_variable v4(4, 2);
    domain_type dom0;
    domain_type dom1(v1);
    domain_type dom2(v1,v3);
    domain_type dom3(v1,v2,v3);
    std::vector<discrete_variable> vec;
    vec.push_back(v3); vec.push_back(v1); vec.push_back(v2);
    domain_type dom4(vec);
    TS_ASSERT_EQUALS(dom3, dom4);
    domain_type dom5 = dom0 + dom2;
    TS_ASSERT_EQUALS(dom5, dom2);
    domain_type dom6 = dom2 + dom2;
    TS_ASSERT_EQUALS(dom6, dom2);
    domain_type dom7(v3);
    TS_ASSERT_EQUALS(dom2 - dom1, dom7);
    TS_ASSERT_EQUALS(dom2 - dom7, dom1);
    TS_ASSERT_EQUALS(dom3 - v2, dom2);
    TS_ASSERT_EQUALS(dom2 + dom1 + dom3, dom3);

    TS_ASSERT_EQUALS(dom2.intersect(dom3), dom2);
    domain_type dom9(v2, v3);
    TS_ASSERT_EQUALS(dom2.intersect(dom3), dom2);
    TS_ASSERT_EQUALS(dom2.intersect(dom7), dom7);
    TS_ASSERT_EQUALS(dom0.intersect(dom3), dom0);
    TS_ASSERT_EQUALS(dom2.intersect(dom9), dom7);
  }

  void test_assignment() {
    std::cout << "Test domain: " << std::endl;
    const size_t max_dim = 5;
    typedef discrete_domain<max_dim> domain_type;
    typedef discrete_assignment<max_dim> assignment_type;
    
    discrete_variable v1(1,11), v2(2,10), v3(3,8), v4(4,3), v5(5,2);
    domain_type dom(v1,v2,v3);
    domain_type sub_dom(v1,v3);
    size_t i = 0;
    size_t val[3];
    for(size_t j = 0; j < 3; ++j) val[j] = 0;
    for(assignment_type asg = dom.begin();
        asg < dom.end(); ++asg, ++i) {
      TS_ASSERT_EQUALS(asg.linear_index(), i);
      TS_ASSERT_EQUALS(asg.asg_at(0), asg.asg(1));
      TS_ASSERT_EQUALS(asg.asg_at(1), asg.asg(2));
      TS_ASSERT_EQUALS(asg.asg_at(2), asg.asg(3));
      TS_ASSERT_EQUALS(val[0], asg.asg(1));
      TS_ASSERT_EQUALS(val[1], asg.asg(2));
      TS_ASSERT_EQUALS(val[2], asg.asg(3));
      for(size_t j = 0; j < 3; ++j) {
        if(val[j] < dom.var(j).size() - 1) {
          val[j]++;
          break;
        } else val[j] = 0;
      }
      assignment_type rev_asg(dom, asg.linear_index());
      TS_ASSERT_EQUALS(rev_asg.asg(1), asg.asg(1));
      TS_ASSERT_EQUALS(rev_asg.asg(2), asg.asg(2));
      TS_ASSERT_EQUALS(rev_asg.asg(3), asg.asg(3));
      TS_ASSERT_EQUALS(rev_asg.linear_index(), asg.linear_index());
      assignment_type other_asg = asg.restrict(sub_dom);
      TS_ASSERT_EQUALS(other_asg.asg(1), asg.asg(1));
      TS_ASSERT_EQUALS(other_asg.asg(3), asg.asg(3));
      assignment_type asg3(dom);
      asg3.set_asg(1, asg.asg(1));
      asg3.set_asg(2, asg.asg(2));
      asg3.set_asg(3, asg.asg(3));
      TS_ASSERT_EQUALS(asg3.asg(1), asg.asg(1));
      TS_ASSERT_EQUALS(asg3.asg(2), asg.asg(2));
      TS_ASSERT_EQUALS(asg3.asg(3), asg.asg(3));
      domain_type dom2(v4, v5);
      for(assignment_type asg2 = dom2.begin();  asg2 < dom2.end(); ++asg2) {
        assignment_type joint = asg & asg2;
        TS_ASSERT_EQUALS(joint.asg(4), asg2.asg(4));
        TS_ASSERT_EQUALS(joint.asg(5), asg2.asg(5));
      }

      assignment_type asg4(sub_dom);
      asg4.update(asg);
      TS_ASSERT_EQUALS(asg4, asg.restrict(sub_dom));
      
    }

    
  }

  
  void test_table_factor() {
    std::cout << "Testing factors" << std::endl;
    const size_t max_dim = 5;
    typedef table_factor<max_dim> factor_type;
    typedef factor_type::domain_type domain_type;
    typedef factor_type::assignment_type assignment_type;

    
    discrete_variable v1(1,3), v2(2,2), v3(3,2), v4(4,3), v5(5,2);
    domain_type dom(v1,v2,v3);

    // Create a factor over the domain
    factor_type factor(dom);
    factor.uniform();
    double sum = 0;
    double sum2 = 0;
    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      sum += std::exp(factor.logP(asg));
      sum2 += std::exp(factor.logP(asg.linear_index()));
    }
    TS_ASSERT_EQUALS(sum, sum2);
    TS_ASSERT_LESS_THAN(std::abs(sum - 1), 1E-10);
   
    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      double val = double(rand()) / RAND_MAX;
      factor.logP(asg) = val;
      TS_ASSERT_EQUALS(factor.logP(asg), 
                       factor.logP(asg.linear_index()));
    }

    factor /= factor;

    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      TS_ASSERT_EQUALS(factor.logP(asg), 0);
    }

    factor *= factor;

    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      TS_ASSERT_EQUALS(factor.logP(asg), 0);
    }

    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      double val = double(rand()) / RAND_MAX;
      factor.logP(asg) = val;
      TS_ASSERT_EQUALS(factor.logP(asg), 
                       factor.logP(asg.linear_index()));
    }

    factor_type factor2 = factor * factor;
    factor_type factor3 = factor2 / factor;

    for(assignment_type asg = dom.begin(); asg < dom.end(); ++asg) {
      TS_ASSERT_EQUALS(factor2.logP(asg)/2, factor.logP(asg));
      TS_ASSERT_EQUALS(factor3.logP(asg), factor.logP(asg));
      TS_ASSERT_EQUALS(factor2.logP(asg)/2, factor.logP(asg.linear_index()));
      TS_ASSERT_EQUALS(factor3.logP(asg), factor.logP(asg.linear_index()));
    }
    
    factor.normalize();


    factor_type factor4( domain_type(v1, v3) );
    factor4.marginalize(factor);
    

    factor_type factor5( domain_type(v1, v3) );
    domain_type d2(v2);
    factor5.zero();

    for(assignment_type asg = d2.begin(); asg < d2.end(); ++asg) {
      factor_type tmp(domain_type(v1,v3));
      tmp.condition(factor, asg);
      for(assignment_type asg2 = tmp.args().begin(); 
          asg2 < tmp.args().end(); ++asg2) {
        factor5.logP(asg2) += std::exp(tmp.logP(asg2));
      }
    } 

    for(size_t i = 0; i < factor5.size(); ++i) {
      factor5.logP(i) = std::log(factor5.logP(i));
      TS_ASSERT_EQUALS(factor5.logP(i), factor4.logP(i));
    }

    factor.logP(0) +=2;
    factor.logP(2) +=3;
    factor.logP(4) +=4;
    factor.logP(6) +=1;

    factor_type counts(factor.args());
    factor.normalize();
    size_t num_samples = 10000000;
    for(size_t i = 0; i < num_samples; ++i) {
      assignment_type asg = factor.sample();
      ++counts.logP(asg);
    }
    sum = 0;
    for(size_t i = 0; i < counts.size(); ++i)
      sum += counts.logP(i);
    for(size_t i = 0; i < counts.size(); ++i) {
      counts.logP(i) = std::log( counts.logP(i) / sum );
    }
    std::cout << "True Factor: " << factor << std::endl;
    std::cout << "Sampled: " << counts << std::endl;
    for(size_t i = 0; i < counts.size(); ++i) {
      TS_ASSERT_LESS_THAN(std::abs(factor.logP(i) -
                                   counts.logP(i)) , 1E-2);
    }

  }

  void test_unary_binary_factors() {

    unary_factor a(0, 5);
    unary_factor b(1, 7);
  
    binary_factor bin(0,5, 1,7);
    
    // initialize factors
    for(size_t i = 0; i < a.arity(); ++i) a.logP(i) = i;
    //  a.normalize();
    for(size_t i = 0; i < a.arity(); ++i)
      for(size_t j = 0; j < b.arity(); ++j)
        bin.logP(0, i, 1, j) = i + j  + 1;
  

    // try some math
    b.convolve(bin,a);
    
    std::cout << a << std::endl;
    std::cout << b << std::endl;
    std::cout << bin << std::endl;
  }
  
  void test_bench_marginalize() {
    // create variables
    std::vector<discrete_variable> v(5);
    for (size_t i = 0;i < 5; ++i) {
      v[i].id() = i;
      v[i].size() = 3;
    }
    // create base domain
    discrete_domain<5> alldomain(v);
    table_factor<5> joint(alldomain);
    joint.uniform();
    // create test marginalization domains
    std::vector<table_factor<5> > testfactors;
    for (size_t i = 0;i < 5; ++i) {
      for (size_t j = i + 1; j < 5; ++j) {
        testfactors.push_back(table_factor<5>(discrete_domain<5>(v[i],v[j])));
      }
    }
    
    timer ti;
    ti.start();
    const size_t iterations = 10000;
    for (size_t i = 0;i < iterations; ++i) {
      for (size_t j = 0; j < testfactors.size(); ++j) {
        testfactors[j].marginalize(joint);
      }
    }
    std::cout << iterations * testfactors.size() 
              << " marginalize ops of 3^5 --> 3^2 done in " 
                 << ti.current_time() << " seconds" << std::endl;
  }
  
  
  void test_bench_condition() {
    // create variables
    std::vector<discrete_variable> v(5);
    for (size_t i = 0;i < 5; ++i) {
      v[i].id() = i;
      v[i].size() = 3;
    }
    // create base domain
    discrete_domain<5> alldomain(v);
    table_factor<5> joint(alldomain);
    joint.uniform();
    // create test marginalization assignments
    std::vector<discrete_assignment<5> > testasg;
    std::vector<table_factor<5> > testfactors;
    for (size_t i = 0;i < 5; ++i) {
      for (size_t j = i + 1; j < 5; ++j) {
        testasg.push_back(discrete_assignment<5>(v[i], j % 3,v[j], i % 3));
        testfactors.push_back(table_factor<5>(alldomain - testasg.rbegin()->args()));
      }
    }
    
    timer ti;
    ti.start();
    const size_t iterations = 1000;
    for (size_t i = 0; i < iterations; ++i) {
      for (size_t j = 0; j < testasg.size(); ++j) {
        testfactors[j].condition(joint, testasg[j]);
      }
    }
    std::cout << iterations * testfactors.size() 
              << " condition ops of 3^5 --> 3^3 done in " 
                 << ti.current_time() << " seconds" << std::endl;
  }
};


================================================
FILE: toolkits/graphical_models/deprecated/factors/unary_factor.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <graphlab/factors/unary_factor.hpp>

std::ostream& operator<<(std::ostream& out, 
                         const graphlab::unary_factor& fact) {
  out << "Unary Factor(" << fact.arity()  << ")"
      << std::endl;
  for(size_t i = 0; i < fact.arity(); ++i) {
    out << fact.logP(i) << " ";
  }
  out << std::endl;
  return out;
} // end of operator<<


================================================
FILE: toolkits/graphical_models/deprecated/factors/unary_factor.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef UNARY_FACTOR_HPP
#define UNARY_FACTOR_HPP


/**
 * This file contains the definitions of some of the basic factor
 * types needed for loopy belief propagation.  This is demo code and
 * is intentionally kept as simple as possible.
 *
 *  \author Joseph Gonzalez
 */


// Including Standard Libraries
#include <cassert>
#include <cmath>
#include <iostream>
#include <algorithm>
#include <limits>

// Random number generation
#include <graphlab/util/random.hpp>
#include <graphlab/serialization/serialization_includes.hpp>

// Basic unary factor
#include "binary_factor.hpp"


// Include the macro for the for each operation
#include <graphlab/macros_def.hpp>


/**
 * A unary factor is a table over a single variable and is associated
 * with edge variable in the pairwise markov random field.  Unary
 * factors are also used to represent messages. All data is
 * represented in log form.
 */
class unary_factor {  
public:

  /**
   * Construct a unary factor with a given variable and arrity.
   */
  unary_factor(uint32_t var = 0, uint16_t arity = 0) :
    _var(var), _data(arity) {}

  // /**
  //  * Copy constructor for a unary factor
  //  */
  // unary_factor(const unary_factor& other) :
  //   _var(other._var), _data(other._data) { }

  // unary_factor& operator=(const unary_factor& other) {
  //   _var = other._var;
  //   _data = other._data;
  //   return *this;
  // }

  /**
   * Increase the size of the factor
   */
  inline void resize(uint16_t arity) {
    _data.resize(arity);
  }
        
  uint32_t& var() { return _var;  }
  const uint32_t& var() const { return _var; }
  uint16_t arity() const { return (uint16_t)_data.size(); }
  size_t size() const { return _data.size(); }
  bool empty() const { return _data.empty(); }


  inline double& logP(size_t asg) {
    assert(asg < arity()); return _data[asg];
  } // end of logP for a unary factor

  inline const double& logP(size_t asg) const {
    assert(asg < arity()); return _data[asg];
  } // end of logP for a unary factor 

    /** zero out the factor */
    // inline void zero() {
    //   for(size_t asg = 0; asg < arity(); ++asg) logP(asg) = 0;
    // }

  inline void uniform(double value = 0) {
    for(uint16_t asg = 0; asg < arity(); ++asg) logP(asg) = value;
  }


  /** ensure that sum_x this(x) = 1 */
  inline void normalize() {
    assert(arity() > 0);
    // Compute the max value
    double max_value = logP(0);
    for(uint16_t asg = 0; asg < arity(); ++asg) 
      max_value = std::max(max_value, logP(asg));
    assert( !std::isinf(max_value) );
    assert( !std::isnan(max_value) );
    // scale and compute normalizing constant
    double Z = 0.0;
    for(uint16_t asg = 0; asg < arity(); ++asg) 
      Z += std::exp(logP(asg) -= max_value);
    assert( !std::isinf(Z) );
    assert( !std::isnan(Z) );
    assert( Z > 0.0);
    double logZ = std::log(Z);
    // Normalize
    for(uint16_t asg = 0; asg < arity(); ++asg)
      logP(asg) -= logZ;
  } // End of normalize

  inline unary_factor& operator*=(const unary_factor& other) {
    ASSERT_EQ(arity(), other.arity());
    for(uint16_t asg = 0; asg < arity(); ++asg)
      logP(asg) += other.logP(asg);
    return *this;
  }

    /** this(x) += other(x); */
  inline unary_factor& operator+=(const unary_factor& other) {
    ASSERT_EQ(arity(), other.arity());
    for(uint16_t asg = 0; asg < arity(); ++asg)
      logP(asg) = log(exp(logP(asg)) + exp(other.logP(asg)));
    return *this;
  } // end plus


    /** this(x) /= other(x); */
  inline unary_factor& operator/=(const unary_factor& other) {
    ASSERT_EQ(arity(), other.arity());
    for(uint16_t asg = 0; asg < arity(); ++asg)
      logP(asg) -= other.logP(asg);
    return *this;
  } // end of divide
  
    /** this(x) = sum_y fact(x,y) * other(y) */
  inline void convolve(const binary_factor& bin_fact,
                       const unary_factor& other) {
    // Compute C(x) = Sum_y A(x,y) B(y)
    for(uint16_t x = 0; x < arity(); ++x) {
      double sum = 0.0;
      for(uint16_t y = 0; y < other.arity(); ++y) {          
        sum += std::exp(bin_fact.logP(var(), x, other.var(), y) +
                        other.logP(y));
      }
      assert( !(sum < 0.0) );
      // Gaurd against zeros
      if(sum == 0) sum = std::numeric_limits<double>::min();
      logP(x) = std::log(sum);
    }
  }
  
  /** this(x) = this(x) * fact(x, asg) */
  inline void condition(const binary_factor& bin_fact,
                        uint16_t asg) {
    uint32_t other_var =
      var() != bin_fact.var1()? bin_fact.var1() : bin_fact.var2();
    for(uint16_t x = 0; x < arity(); ++x) 
      logP(x) += bin_fact.logP(var(), x, other_var, asg);    
  } // end of condition
  
  
    /** This = other * damping + this * (1-damping) */
  inline void damp(const unary_factor& other, double damping) {
    assert(arity() == other.arity());
    if(damping == 0) return;
    assert(damping >= 0.0);
    assert(damping < 1.0);
    for(uint16_t asg = 0; asg < arity(); ++asg) 
      logP(asg) = std::log(damping * std::exp(other.logP(asg)) + 
                           (1.0 - damping) * std::exp(logP(asg)));  
  } // end of damp
  
  
    /** Compute the residual between two unary factors */
  inline double residual(const unary_factor& other) const {  
    assert(arity() == other.arity());
    double residual = 0;
    for(uint16_t asg = 0; asg < arity(); ++asg) 
      residual += std::abs(std::exp(logP(asg)) - 
                           std::exp(other.logP(asg)));
    return residual / arity();
  } // end of residual
  
  
    /** get the max assignment*/
  inline size_t max_asg() const {  
    size_t max_asg = 0;
    double max_value = logP(0);
    for(uint16_t asg = 0; asg < arity(); ++asg) { 
      if(logP(asg) > max_value) {
        max_value = logP(asg);
        max_asg = asg;
      }
    }
    return max_asg;
  } // end of max asg
  
    /** Get the expected assignment */
  inline double expectation() const {  
    double sum = 0;
    double s2 = 0;
    for(uint16_t asg = 0; asg < arity(); ++asg)  {
      sum += asg * std::exp(logP(asg));       
      s2 += std::exp(logP(asg));       
    }
    return sum / s2;;
  } // end of expectation
  
    /** Draw a random sample from the factor */
  inline size_t sample() const {  
    // Using the cdf method to generate a random sample
    assert(arity() > 0);
    // double t = static_cast<double>(rand()) / RAND_MAX;
    double t = graphlab::random::rand01();
    assert( t >= 0);
    assert(t < 1);
    double sum = 0.0;
    for(uint16_t asg = 0; asg < arity(); ++asg) {
      sum += exp(logP(asg));
      if(t <= sum) return asg;
      assert(sum < 1);
    }
    // We were unable to draw a sample;
    assert(false);
  } // end of sample

  void save(graphlab::oarchive &oarc) const {
    oarc << _var << _data;
  }
  void load(graphlab::iarchive &iarc) {
    iarc >> _var >> _data;
  }
private:
  uint32_t _var;
  std::vector<double> _data;
};  // End of unary factor


inline std::ostream& operator<<(std::ostream& out, 
                                const unary_factor& fact) {
  out << "Unary Factor(" << fact.arity()  << ")"
      << std::endl;
  for(size_t i = 0; i < fact.arity(); ++i) {
    out << fact.logP(i) << " ";
  }
  out << std::endl;
  return out;
} // end of operator<<


#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/CMakeLists.txt
================================================
project(GraphLab)


# add_library(pgibbs STATIC
#     image.cpp
#     chromatic_sampler.cpp 
#     util.cpp 
#     factorized_model.cpp
#     mrf.cpp
#     junction_tree.cpp
#     jt_splash_sampler.cpp
#     pgibbs_tls.cpp
#     global_variables.cpp
#     )

# add_library(pgibbs_pic STATIC
#     image.cpp
#     chromatic_sampler.cpp 
#     util.cpp 
#     factorized_model.cpp
#     mrf.cpp
#     junction_tree.cpp
#     jt_splash_sampler.cpp
#     pgibbs_tls.cpp
#     global_variables.cpp
#     )

# target_link_libraries(pgibbs_pic 
#   graphlab_pic)

# target_link_libraries(pgibbs 
#   graphlab)

# get_property(pgibbs_flags TARGET pgibbs PROPERTY COMPILE_FLAGS)
# set_target_properties(pgibbs_pic
#   PROPERTIES COMPILE_FLAGS "${pgibbs_flags} -fPIC")


# add_graphlab_executable(make_denoise_alchemy 
#   make_denoise_alchemy.cpp 
#   )
# target_link_libraries(make_denoise_alchemy pgibbs)


# add_graphlab_executable(sampler sampler.cpp)
# target_link_libraries(sampler pgibbs)


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/README
================================================
The pgibbs folder contains the implementations of the parallel Gibbs
samplers described in:

  Parallel Gibbs Sampling: From Colored Fields to Think Junction Trees
   by Joseph Gonzalez, Yucheng Low, Arthur Gretton, and Carlos Guestrin

Because the sampler.cpp is currently taylored to the Alchemy
intermediate factor format we recommend using the precompiled matlab
binaries in the matlab folder.  The matlab interface uses a very
simple discrete factorized model representation.

This project has to main files the sampler.cpp which defines the main
sampler binary and the make_denoise_alchemy.cpp which creates a
synthetic denoising problem in Alchemy format:

  sampler.cpp: This is the main point of entry for the Gibbs sampler
    binary.  To learn more about how to use this run ./sampler --help
    The sampler binary operates on alchemy factor-graph files with the
    format defined in Section

  make_denoise_alchemy.cpp: This file is used to create a simple
    synthetic image denoising problem.  To learn more about this file
    run ./make_denoise_alchemy.

Alchemy Factor Graph Format:

The Alchemy factor graph form was developed in collaboration with the
Alchemy team (http://alchemy.cs.washington.edu/) at the University of
Washington.  The format describes a discrete factorized model as a
text file with the following form:

   variables:
   <varid>\t<nstates>
    ...
   factors:
   <varid> / <varid> / ... / <varid> // <logP(0, 0, ..., 0)> <logP(1, 0, ..., 0)> ...

The remaining files are described below:

  factorized_model.hpp/cpp: This file is used to read and parse
    Alchemy files into GraphLab data structures to represent the
    factorized model.  Note this file also contains the key
    definitions of many of the various types (vertex_id_t, variable_t,
    ...) used in the Gibbs sampler. 

  mrf.hpp/cpp: This file defines the key data-structures needed to
    assemble a GraphLab graph representing a Markov Random Field. In
    addition this file defines routines to construct an MRF from a
    factorized model. 

  global_variable.hpp/cpp: This file defines the few global variables
    which are used to access the factors from within the threads
    (without needing to make copies) as well as the GraphLab GLShared
    objects which are GraphLab managed global variables (formerly part
    of the Shared Data Table). 
  

  chromatic_sampler.hpp/cpp: This file defines the key parts of the
    chromatic sampler algorithm including the update function as well
    as a helper routine used to launch the chromatic sampler for a
    series of timed experiments.
  
  junction_tree.hpp/cpp: This file defines helper routine to construct
    junction_trees from a subset of the variables in the MRF.  In
    particular this file contains the extend() routine which is
    described in the original paper.  Finally, this file contains
    routines to build a GraphLab graph representation of a junction
    tree to run parallel calibration and sampling on the junction
    tree.

  jt_splash_sampler.hpp/cpp: This rather long file contains the bulk
    of the junction tree Splash sampler.  The key parts of which are
    the jt_splash_sampler object and its dependent jt_worker object.
    The jt_splash_sampler maintains ncpus jt_workers.  Each jt_worker
    has access to the shared MRF and asynchronously constructs
    Splashes. In addition the splash_settings object describes the
    settings for the jt_splash_sampler.

  pgibbs_tls.hpp/cpp: This file defines thread local storage objects
    used to reduce memory allocations when running the various
    sampling algorithms.  


For questions or comments please contact me at jegonzal@cs.cmu.edu.

Thanks,
Joey


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/chromatic_sampler.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "util.hpp"
#include "chromatic_sampler.hpp"
#include "run_statistics.hpp"
#include "global_variables.hpp"

// Include the macro for the foreach operation
#include <graphlab/macros_def.hpp>


void gibbs_update::operator()(base::icontext_type& context) {
  mrf_vertex_data& vdata = context.vertex_data();
  //TODO: switch to use tls
  factor_t belief(vdata.variable);
  belief.uniform();
  foreach(const factor_id_t factor_id, vdata.factor_ids) {
    //const factor_t& factor(SHARED_FACTORS.get()[factor_id]);
    const factor_t& factor((*SHARED_FACTORS_PTR)[factor_id]);
    // build the conditional
    assignment_t conditional_asg = factor.args() - vdata.variable;
    for(size_t i = 0; i < conditional_asg.num_vars(); ++i) {
      const mrf_vertex_data& other_vdata = 
	context.const_neighbor_vertex_data(conditional_asg.args().var(i).id());
      assert(conditional_asg.args().var(i) == other_vdata.variable);
      conditional_asg.set_asg_at(i, other_vdata.asg);
    }
    belief.times_condition(factor, conditional_asg);
  }
  belief.normalize();
  size_t new_asg = belief.sample().asg_at(0);
  vdata.nchanges += (new_asg != vdata.asg);
  vdata.asg = new_asg;
  vdata.belief += belief;
  vdata.nsamples++;  
}


// bool nsamples_terminator(const mrf_gl::ishared_data* shared_data) {
//   const size_t nsamples = n_samples.get_val();
//   bool terminate = nsamples >= MAX_NSAMPLES.get();
//   if(terminate) {
//     //     std::cout << "Termination condition reached" << std::endl;
//   }
//   return terminate;
// }


void run_chromatic_sampler(graphlab::core<mrf_graph_type, gibbs_update>& core,
                           const std::string& chromatic_results_fn,
                           const std::vector<double>& runtimes,
                           const bool draw_images) {
  // Initialize scheduler
  core.set_scheduler_type("chromatic");
  core.set_scope_type("null");

  const size_t ncpus = core.get_options().get_ncpus();

  // Use fixed update function
  gibbs_update ufun;
  core.schedule_all( ufun );
  
  double total_runtime = 0;
  double actual_total_runtime = 0;
  foreach(const double experiment_runtime, runtimes) {
    total_runtime += experiment_runtime;
    // get the experiment id
    size_t experiment_id = file_line_count(chromatic_results_fn);
    std::cout << "Running chromatic sampler experiment " << experiment_id
              << " for " << experiment_runtime << " seconds." << std::endl;
    // set the termination time
    core.engine().set_timeout(experiment_runtime);
    // Run the engine
    graphlab::timer timer;
    timer.start();
    core.start();
    double actual_experiment_runtime = timer.current_time();
    actual_total_runtime += actual_experiment_runtime;
    /// ==================================================================
    // Compute final statistics of the mode
    run_statistics stats(core.graph());
    stats.print();
    // Save the beliefs
    save_beliefs(core.graph(),
                 make_filename("chromatic_blfs_", ".tsv", experiment_id));
    // // Save the current assignments
    save_asg(core.graph(),
             make_filename("chromatic_asg_", ".asg", experiment_id));
    // Save the experiment
    std::ofstream fout(chromatic_results_fn.c_str(), std::ios::app);
    fout.precision(16);
    fout << experiment_id << '\t'
         << total_runtime << '\t'
         << actual_total_runtime << '\t'
         << ncpus << '\t'
         << stats.nsamples << '\t'
         << stats.nchanges << '\t'
         << stats.loglik << '\t'
         << stats.min_samples << '\t'
         << stats.max_samples << std::endl;
    fout.close();
    // Plot images if desired
    if(draw_images) draw_mrf(experiment_id, "chromatic", core.graph());
  }
} // end run_chromatic sampler


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/chromatic_sampler.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef CHROMATIC_SAMPLER_HPP
#define CHROMATIC_SAMPLER_HPP

#include <graphlab.hpp>

#include "mrf.hpp"

class gibbs_update :
  public graphlab::iupdate_functor<mrf_graph_type, gibbs_update> {
  typedef graphlab::iupdate_functor<mrf_graph_type, gibbs_update> base;
  void operator()(base::icontext_type& context);
}; // end of class gibbs update

/** Get the update counts for a vertex */
inline size_t get_nsamples(const mrf_vertex_data& vdata) { 
  return vdata.nsamples; 
}


//! Run the chromatic sampler for a fixed ammount of time
void run_chromatic_sampler(graphlab::core<mrf_graph_type, gibbs_update>& core, 
                           const std::string& chromatic_results_fn,
                           const std::vector<double>& runtime,
                           const bool draw_images);


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/factorized_model.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "factorized_model.hpp"


#include <graphlab/macros_def.hpp>

void factorized_model::reserve(const size_t num_factors) {
  _factors.reserve(num_factors);
}

factor_t& factorized_model::add_factor(const factor_t& factor) {
  _factors.push_back(factor);
  // // normalize the factor
  // _factors.back().normalize();
  factor_id_t factor_id = _factors.size() - 1;
  for(size_t i = 0; i < factor.num_vars(); ++i) {
    variable_t var = factor.args().var(i); 
    _variables.insert(var);
    // add factor to reverse map
    _var_to_factor[var].insert(factor_id);
  }
  return _factors.back();
}

factor_t& factorized_model::add_factor(const domain_t& vars) {
  _factors.push_back(factor_t());
  factor_t& factor = _factors.back();
  factor.set_args(vars);
  // // normalize the factor
  // _factors.back().normalize();
  factor_id_t factor_id = _factors.size() - 1;
  for(size_t i = 0; i < factor.num_vars(); ++i) {
    variable_t var = factor.args().var(i); 
    _variables.insert(var);
    // add factor to reverse map
    _var_to_factor[var].insert(factor_id);
  }
  return _factors.back();
}


const std::set<factor_id_t>& 
factorized_model::factor_ids(const variable_t& var) const {
  typedef std::map<variable_t, std::set<factor_id_t> >::const_iterator iterator;
  iterator iter = _var_to_factor.find(var);
  ASSERT_TRUE(iter != _var_to_factor.end());
  return iter->second;
}


void factorized_model::load_alchemy(const std::string& filename) {
  // Open an input file stream
  std::ifstream fin(filename.c_str());
  ASSERT_TRUE(fin.good());
  std::string line;
  size_t line_number = 0;
  // Read the first line which should be "variable:"
  const bool success = getline(fin,line,line_number++);
  ASSERT_TRUE(success);
  line = trim(line);
  {
    const std::string variables_str("variables:");
    ASSERT_EQ(line, variables_str);
  }
  // Read all the variables and create a map from the variable name
  // (string) to the variable* prl variable pointer.
  typedef std::map<std::string, variable_t> var_map_type;
  typedef var_map_type::iterator var_map_iter_type;
  var_map_type var_map;
  size_t unique_var_id = 0;
  while(fin.good() &&
        getline(fin, line, line_number++) &&
        trim(line) != "factors:") {
    // Separate into name and size
    line = trim(line);
    ASSERT_GT(line.length(), 0);
    size_t namelen = line.find_last_of('\t');
    size_t varsize = 2;
    // if their is a '\t' character then the variable size follows it
    if(namelen != std::string::npos) {
      std::stringstream istrm(trim(line.substr(namelen)));
      istrm >> varsize;
    }
    // Get the variable name
    std::string var_name = trim(line.substr(0, namelen));
    ASSERT_GT(varsize, 0);
    // Create a new finite variable in the universe
    variable_t variable(unique_var_id++, varsize);
    // Store the variable in the local variable map
    var_map[var_name] = variable;
    _var_name.push_back(var_name);
    ASSERT_EQ(_var_name.size(), unique_var_id);
  }

  // Starting to read factors
  {
    const std::string factors_string("factors:");
    ASSERT_EQ(trim(line), factors_string);
  }

  while(fin.good() && getline(fin, line, line_number++)) {
    /// if the line is empty skip it
    if(trim(line).length() == 0) continue;
    //      std::cout << "Line: " << line << std::endl;
    // the factor being read may contain the same variable multiple
    // times to account for that, we first read a temporary factors,
    // making every variable unique artificially, and then convert
    // it to the factor we actually need

    // Process the arguments
    size_t end_of_variables = line.find("//")-1;
    std::vector<variable_t> args;
    std::set<variable_t> args_set;

    // Read in all the variables in the factor and store them
    for(size_t i = 0; i < end_of_variables;
        i = line.find_first_of('/', i) + 1) {
      // Read the next variable as a string
      std::string variable_name =
        trim(line.substr(i, line.find_first_of('/',i) - i));
      //        std::cout << "Variable Name: " << variable_name << std::endl;
      // Look up the variable in the variable map
      var_map_iter_type iter = var_map.find(variable_name);
      ASSERT_TRUE(iter != var_map.end());
      variable_t var = iter->second;
      // This argument must be unique
      if(args_set.count(var) > 0) {
        std::cout << "Line Number: " << line_number << std::endl;
        ASSERT_EQ(args_set.count(var), 0);
      }

      args_set.insert(var);
      // Save the arguments read from the file
      args.push_back(var);
    } // end of first pass through variable

      // Construct the arguments (which will remap the domain)
    domain_t domain(args);
    //      std::cout << "domain: " << domain << std::endl;
    // Build the factor
    factor_t factor(domain);
      
    // Now for the tricky part we need an assignment in the original
    // order
    domain_t orig_domain;
    for(size_t i = 0; i < args.size(); ++i) {
      orig_domain += variable_t(i, args[i].size());
    }


    // Advance to the correct location in the line
    std::istringstream tbl_values;
    size_t weightpos = line.find("///");
    if (weightpos == std::string::npos) {
      tbl_values.str(line.substr(line.find("//") + 2));
    } else {
      size_t startpos = line.find("//") + 2;
      tbl_values.str(line.substr(startpos, weightpos - startpos));
    }
      
    // Read in the weights
    for(assignment_t orig_asg = orig_domain.begin();
        orig_asg < orig_domain.end(); ++orig_asg) {
      assignment_t asg(domain);
      // Translate the original assignment into the sorted factor assignment
      for(size_t i = 0; i < domain.num_vars(); ++i) {
        size_t variable_id = args[i].id();
        asg.set_asg(variable_id, orig_asg.asg(i));
      }
      // Read then next value
      ASSERT_TRUE(tbl_values.good());
      double value = 0;
      tbl_values >> value;
      // Values are stored in log form      
      factor.logP(asg.linear_index()) = value;                
    }
    // Save the factor to the factor graph
    add_factor(factor);                
  } // End of outer while loop over factors should be end of file

  ASSERT_FALSE(fin.good());
  fin.close();
} // end of load alchemy


  //! Save the factor to a file
void factorized_model::save(graphlab::oarchive &arc) const {
  arc << _variables
      << _factors
      << _var_to_factor
      << _var_name;
}


//! Load the factor from a file
void factorized_model::load(graphlab::iarchive &arc) {
  arc >> _variables
      >> _factors
      >> _var_to_factor
      >> _var_name;
}  


//! save the alchemy file
void factorized_model::save_alchemy(const std::string& filename) const {
  std::ofstream fout(filename.c_str());
  ASSERT_TRUE(fout.good());
  fout << "variables:" << std::endl;
  foreach(variable_t var, _variables) {
    fout << var.id() << '\t' << var.size() << "\n";
  }
  fout << "factors:" << std::endl;
  foreach(const factor_t& factor, _factors) {
    domain_t domain = factor.args();
    for(size_t i = 0; i < domain.num_vars(); ++i) {
      fout << domain.var(i).id();
      if(i + 1 < domain.num_vars()) fout << " / ";
    }
    fout << " // ";
    for(size_t i = 0; i < factor.size(); ++i) {
      fout << factor.logP(i);
      if(i + 1 < factor.size()) fout << ' ';
    }
    fout << '\n';
  }
  fout.flush();
  fout.close();
}


#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/factorized_model.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_FACTORIZED_MODEL_HPP
#define PGIBBS_FACTORIZED_MODEL_HPP

/**
 *
 * Represents a factorized model in alchemy format
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>


#include <iostream>
#include <iomanip>

#include <fstream>
#include <vector>
#include <map>
#include <set>
#include <string>
#include <cassert>


#include <graphlab.hpp>


// The maximum number of dimensions in a factor table
const size_t MAX_TREEWIDTH = 32;
const size_t MAX_DIM = MAX_TREEWIDTH + 1;

// Basic graphical model typedefs
typedef uint32_t                        factor_id_t;
typedef graphlab::discrete_variable     variable_t;
typedef graphlab::table_factor<MAX_DIM> factor_t;
typedef factor_t::domain_type           domain_t;
typedef factor_t::assignment_type       assignment_t;


// A class which represents a factorized distribution as a collection
// of factors.
class factorized_model {
private:
  std::set<variable_t> _variables;
  std::vector<factor_t> _factors;
  std::map< variable_t, std::set<factor_id_t> > _var_to_factor;
  std::vector<std::string> _var_name;

  /**
   * same as the stl string get line but this also increments a line
   * counter which is useful for debugging purposes
   */
  inline bool getline(std::ifstream& fin,
                      std::string& line,
                      size_t line_number) {
    return std::getline(fin, line).good();
  }

  /**
   * Removes trailing and leading white space from a string
   */
  inline std::string trim(const std::string& str) {
    std::string::size_type pos1 = str.find_first_not_of(" \t\r");
    std::string::size_type pos2 = str.find_last_not_of(" \t\r");
    return str.substr(pos1 == std::string::npos ? 0 : pos1,
                      pos2 == std::string::npos ? str.size()-1 : pos2-pos1+1);
  }

public:

  typedef std::vector<factor_t> factor_map_t;


  void reserve(const size_t num_factors);

  //! add a factor to the factorized model
  factor_t& add_factor(const factor_t& factor);

  //! add a factor to the factorized model
  factor_t& add_factor(const domain_t& dom);

  
  const factor_map_t& factors() const { return _factors; }
  const std::set<variable_t>& variables() const { return _variables; }

  const std::set<factor_id_t>& factor_ids(const variable_t& var) const;

  const std::string& var_name(size_t id) const {
    ASSERT_LT(id, _var_name.size());
    return _var_name[id];
  }

  
  void load_alchemy(const std::string& filename);

  //! Save the factor to a file
  void save(graphlab::oarchive &arc) const;

  //! Load the factor from a file
  void load(graphlab::iarchive &arc);

  //! save the alchemy file
  void save_alchemy(const std::string& filename) const;
 
}; //end of factorized model


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/global_variables.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "global_variables.hpp"

// Global Shared Varaibles ====================================================>
//graphlab::glshared_const<factorized_model::factor_map_t> SHARED_FACTORS;
const factorized_model::factor_map_t* SHARED_FACTORS_PTR = NULL;
graphlab::glshared_const<size_t> MAX_NSAMPLES;
graphlab::glshared<size_t> n_samples;


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/global_variables.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef  PGIBBS_GLOBAL_VARIABLES
#define  PGIBBS_GLOBAL_VARIABLES


#include <graphlab.hpp>

#include "factorized_model.hpp"

// Global Shared Varaibles ====================================================>
//extern graphlab::glshared_const<factorized_model::factor_map_t*> SHARED_FACTORS;
extern const factorized_model::factor_map_t* SHARED_FACTORS_PTR;
extern graphlab::glshared_const<size_t> MAX_NSAMPLES;
extern graphlab::glshared<size_t> n_samples;

#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/image.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <cassert>
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <limits>
#include <cmath>

#include <boost/random.hpp>


#include <graphlab.hpp>


#include "image.hpp"


#include <graphlab/macros_def.hpp>


void image::save_vec(const char* filename) const {
  std::ofstream os(filename);
  ASSERT_TRUE(os.good());
  for(size_t i = 0; i < pixels(); ++i) {
    os << pixel(i) << "\n";
  }
  os.flush();
  os.close();
}


double image::min() const {
  return *std::min_element(data.begin(), data.end());
}


double image::max() const {
  return *std::max_element(data.begin(), data.end());
}


void image::save(graphlab::oarchive &oarc) const {
  oarc << _rows;
  oarc << _cols;
  oarc << data;
}


void image::load(graphlab::iarchive &iarc) {
  iarc >> _rows;
  iarc >> _cols;
  iarc >> data;
}


/** Generate a normally distributed random number N(mu, sigma^2) */
// std::pair<double, double> randn(double mu = 0, double sigma = 1 ); 


// IMPLEMENTATION =============================================================>


void image::resize(size_t rows, size_t cols) {
  _rows = rows;
  _cols = cols;
  data.resize(rows * cols, 0);
}
  

/** Get the vertex id of a pixel */
size_t image::vertid(size_t i, size_t j) const {
  ASSERT_LT(i, _rows);
  ASSERT_LT(j, _cols);    
  return i * _cols + j; 
}

// static size_t image::vertid(size_t rows, size_t cols, size_t i, size_t j)  {
//   assert(i < rows);
//   assert(j < cols);    
//   return i * cols + j; 
// }


/** Get the vertex id of a pixel */
std::pair<size_t, size_t> image::loc(size_t vertexid) const {
  ASSERT_LT(vertexid, _rows * _cols);
  return std::make_pair( vertexid / _cols, vertexid % _cols);
}


void image::save(const char* filename) const {
  std::ofstream os(filename);
  os << "P2" << std::endl
     << _cols << " " << _rows << std::endl
     << 255 << std::endl;
  // Compute min and max pixel intensities
  double min = data[0]; double max = data[0];
  for(size_t i = 0; i < _rows * _cols; ++i) {
    min = std::min(min, data[i]);
    max = std::max(max, data[i]);
  }

  // Save the image (rescaled)
  for(size_t r = 0; r < _rows; ++r) {
    for(size_t c = 0; c < _cols; c++) {
      if(min != max) {
        int color = 
          static_cast<int>(255.0 * (pixel(r,c) - min)/(max-min));
        os << color;
      } else { os << min; }
      if(c != _cols-1) os << "\t";
    }
    os << std::endl;
  } 
  os.flush();
  os.close();
} // end of save


void image::paint_sunset(size_t states) {
  const double center_r = rows() / 2.0;
  const double center_c = cols() / 2.0;
  const double max_radius = std::min(rows(), cols()) / 2.0;
  // Fill out the image
  for(size_t r = 0; r < rows(); ++r) {
    for(size_t c = 0; c < cols(); ++c) {
      double distance = sqrt((r-center_r)*(r-center_r) + 
                             (c-center_c)*(c-center_c));
      // If on top of image
      if(r < rows() / 2) {
        // Compute ring of sunset
        size_t ring = 
          static_cast<size_t>(std::floor(std::min(1.0, distance/max_radius)
                                         * (states - 1) ) );
        pixel(r,c) = ring;
      } else {
        size_t blockx = r / 20;
        size_t blocky = (c + 20 * sin(10.0*r/rows())) / 20;
        size_t index = blockx + 2*blocky;
        pixel(r,c) = index % states;

      }
    }
  }
} // end of paint_beatiful_sunset


void image::paint_checkerboard(size_t states, size_t blocks) {
  size_t block_size = std::min(rows(), cols() ) / blocks;
  // Fill out the image
  for(size_t r = 0; r < rows(); ++r) {
    for(size_t c = 0; c < cols(); ++c) {
      size_t blockx = r / block_size;
      size_t blocky = c / block_size;
      size_t index = blockx + blocky * block_size;
      pixel(r,c) = index % states;
    }
  }
} // end of paint_beatiful_sunset


/** corrupt the image with gaussian noise */
void image::gaussian_corrupt(double sigma) {
  //  boost::mt19937 rng;
  boost::lagged_fibonacci607 rng;
  boost::normal_distribution<double> noise_model(0, sigma);
  for(size_t i = 0; i < rows() * cols();  ) {
    // Corrupt two pixels at a time.
    pixel(i++) += noise_model(rng);
  }
} // end of corrupt_image

/** flip_corrupt */
void image::flip_corrupt(size_t states, double prob_flip) {
  boost::mt19937 rng;
  boost::uniform_real<double> dist01;

  for(size_t i = 0; i < rows() * cols();  ++i) {
    double p = dist01(rng);
    if(p < prob_flip) pixel(i) = rand() % states;
  }
} // end of corrupt_image


// /** generate a normally distributed iid pair */
// std::pair<double, double> randn(double mu , double sigma ) {
//   // Generate a N(0,1) from a Unif(0,1) using Box-Muller generator:
//   double u1 = static_cast<double>(rand()) / RAND_MAX;
//   double u2 = static_cast<double>(rand()) / RAND_MAX;
//   double coeff = std::sqrt(-2.0 * std::log(u1));
//   double n1 = coeff * std::cos(2.0 * M_PI * u2) ;
//   double n2 = coeff * std::sin(2.0 * M_PI * u2) ;
//   // Adjust for mean and variance
//   n1 = sigma * n1 + mu;
//   n2 = sigma * n2 + mu; 
//   return std::make_pair(n1, n2);
// } // end of randn

#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/image.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_IMAGE_HPP
#define PGIBBS_IMAGE_HPP

#include <cassert>
#include <iostream>
#include <vector>

#include <graphlab.hpp>


/** A simple struct represent a gray scale image */
class image {
  size_t _rows, _cols;
  std::vector<double> data;
public:

  /** Create an empty image */
  image() : _rows(0), _cols(0), data(0,0) { }
  
  /** Create an image of a fixed size */
  image(size_t rows, size_t cols) : 
    _rows(rows), _cols(cols), data(rows * cols, 0) { }

  void resize(size_t rows, size_t cols);

  /** Get the number of rows */
  size_t rows() const { return _rows; }

  /** Get the number of columns */
  size_t cols() const { return _cols; }

  /** get the number of pixels */
  size_t pixels() const { return _rows * _cols; }

  /** A function to read a pixel */
  double& pixel(size_t i, size_t j) { return data[vertid(i,j)]; }
  double pixel(size_t i, size_t j) const { return data[vertid(i,j)]; }
  
  /** Linear indexing */
  double& pixel(size_t i) { return data.at(i); }
  double pixel(size_t i) const { return data.at(i); }

  /** Get the vertex id of a pixel */
  size_t vertid(size_t i, size_t j) const;

  static size_t vertid(size_t rows, size_t cols, size_t i, size_t j) {
    ASSERT_LT(i, rows);
    ASSERT_LT(j, cols);    
    return i * cols + j; 
  }
  
  
  /** Get the pixel address from the vertex id */
  std::pair<size_t, size_t> loc(size_t vertex) const;

  
  /** A function to save the image to a file in pgm format */
  void save(const char* filename) const;

  void save_vec(const char* filename) const;
  
  /** paint a beautiful sunset */
  void paint_sunset(size_t states);
  
  void paint_checkerboard(size_t states, size_t blocks = 10);
  
  /** Add random noise to the image */
  void gaussian_corrupt(double sigma);

  void flip_corrupt(size_t states, double flip_prob);

  double min() const;

  double max() const;

  void save(graphlab::oarchive &oarc) const;
  
  void load(graphlab::iarchive &iarc);

};


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/jt_splash_sampler.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "util.hpp"
#include "jt_splash_sampler.hpp"
#include "pgibbs_tls.hpp"
#include "run_statistics.hpp"
#include "global_variables.hpp"


#include <graphlab/macros_def.hpp>


void run_jtsplash_sampler(mrf_graph_type& mrf_graph,
                          const std::string& jtsplash_results_fn,
                          const std::vector<double>& runtimes,
                          const bool draw_images,
                          const splash_settings& settings) {

  //  size_t ncpus = core.engine().get_ncpus();
  // bool affinities = 
  //   core.get_engine_options().get_cpu_affinities();
  // Initialize the jtsplash sampler
  jt_splash_sampler jtsplash_sampler(mrf_graph, settings);

  double total_runtime = 0;
  double actual_total_runtime = 0;
  foreach(const double experiment_runtime, runtimes) {
    total_runtime += experiment_runtime;
    // get the experiment id
    size_t experiment_id = file_line_count(jtsplash_results_fn);
    std::cout << "Running JTSplash sampler experiment " << experiment_id
              << " for " << experiment_runtime << " seconds." << std::endl;

    std::cout << "Settings: ======================" << std::endl
              << "Experiment:    " << experiment_id << std::endl
              << "runtime:       " << experiment_runtime << std::endl
              << "treesize:      " << settings.max_tree_size << std::endl
              << "treewidth:     " << settings.max_tree_width << std::endl
              << "treeheight:    " << settings.max_tree_height << std::endl
              << "factorsize:    " << settings.max_factor_size << std::endl
              << "subthreads:    " << settings.subthreads << std::endl
              << "priorities:    " << settings.priorities << std::endl   
              << "vanish:        " << settings.vanish_updates << std::endl;   


    graphlab::timer timer;
    timer.start();

    // run the sampler once
    jtsplash_sampler.sample_seconds(experiment_runtime);

    double actual_experiment_runtime = timer.current_time();
    std::cout << "Actual Experiment Runtime:" 
              << actual_experiment_runtime << std::endl;        
    actual_total_runtime += actual_experiment_runtime;
    std::cout << "Total Experiment Runtime (actual): "
              << total_runtime 
              << "(" << actual_total_runtime << ")" 
              << std::endl;


    // check mrf graph
    for(size_t i = 0; i < mrf_graph.num_vertices(); ++i) {
      ASSERT_EQ(mrf_graph.vertex_data(i).tree_info.tree_id, NULL_VID);
    }


    /// ==================================================================
    // Compute final statistics of the mode
    run_statistics stats(mrf_graph);
    stats.print();
    // Save the beliefs
    save_beliefs(mrf_graph,
                 make_filename("jtsplash_blfs_", ".tsv", experiment_id));
    // // Save the current assignments
    save_asg(mrf_graph,
             make_filename("jtsplash_asg_", ".asg", experiment_id));
    // Save the experiment
    std::ofstream fout(jtsplash_results_fn.c_str(), std::ios::app);
    fout.precision(8);
    fout << experiment_id << '\t'
         << total_runtime << '\t'
         << actual_total_runtime << '\t'
         << settings.ntrees << '\t'
         << stats.nsamples << '\t'
         << stats.nchanges << '\t'
         << stats.loglik << '\t'
         << stats.min_samples << '\t'
         << stats.max_samples << '\t'
         << settings.max_tree_size << '\t'
         << settings.max_tree_width << '\t'
         << settings.max_factor_size << '\t'
         << settings.max_tree_height << '\t'
         << settings.subthreads << '\t'
         << settings.priorities << '\t'
         << jtsplash_sampler.total_trees() << '\t'  
         << jtsplash_sampler.total_collisions() << '\t'
         << std::endl;
    fout.close();

    // Plot images if desired
    if(draw_images) draw_mrf(experiment_id, "jtsplash", mrf_graph);

  } // end of for loop over runtimes

} // end of run_jtsplash_sampler


void jtree_update::operator()(base::icontext_type& context) {
  typedef factorized_model::factor_map_t factor_map_t;
  ASSERT_NE(mrf_ptr, NULL);
  mrf_graph_type& mrf = *mrf_ptr;
    
  // get the vertex data
  jtree_vertex_data& vdata = context.vertex_data();
    
  // get thread local storage to reduce hit on allocator
  pgibbs_tls& tls = get_pgibbs_tls();

  //////////////////////////////////////////////////////////////////
  // Initialize factor    
    
  // If the factor args have not been set then we need to initialize
  // the local factor by setting the args and taking the product of
  // all factors associated with the clique.  Some of these factors
  // may depend on variables not in the clique and are therefore
  // sliced (conditioned) on the current assignment to those
  // variables.
  if(vdata.factor.args() != vdata.variables) {
    // Resize the factor for the variables in the clique
    vdata.factor.set_args(vdata.variables);
    vdata.factor.uniform();

    // We now build up the factor by iteratoring over the dependent
    // factors conditioning if necessary into the conditional_factor
    // and then multiplying.
    factor_t& conditional_factor(tls.conditional_factor);
    // Iterate over the factors and multiply each into this factor
    foreach(size_t factor_id, vdata.factor_ids) {
      //const factor_t& factor = SHARED_FACTORS.get()[factor_id];
      const factor_t& factor = (*SHARED_FACTORS_PTR)[factor_id];
      // Build up an assignment for the conditional
      domain_t conditional_args = factor.args() - vdata.variables;
      assignment_t conditional_asg;
      for(size_t i = 0; i < conditional_args.num_vars(); ++i) {
        const mrf_vertex_data& mrf_vdata = 
          mrf.vertex_data(conditional_args.var(i).id());
        ASSERT_EQ(mrf_vdata.tree_info.tree_id, NULL_VID);
        //        ASSERT_FALSE(mrf_vdata.tree_info.in_tree);
        conditional_asg &= 
          assignment_t(mrf_vdata.variable, mrf_vdata.asg);         
      }
      // set the factor arguments
      conditional_factor.set_args(factor.args() - conditional_args);
      conditional_factor.condition(factor, conditional_asg);        
      // Multiply the conditional factor in
      vdata.factor *= conditional_factor;
    }
    // Extra normalization for stability on the table factors
    vdata.factor.normalize();
    // vdata.belief = vdata.factor;
  }

  //////////////////////////////////////////////////////////////////
  // receive any unreceived messages
  size_t received_neighbors = 0;
  if(!vdata.calibrated) {
    foreach(edge_id_t in_eid, context.in_edge_ids()) {
      jtree_edge_data& in_edata = context.edge_data(in_eid);
      // if the message has been calibrated but not received
      if(in_edata.calibrated && !in_edata.received) {
        // receive message and mark as calibrated
        vdata.factor *= in_edata.message;
        vdata.factor.normalize();
        in_edata.received = true;
      }
      // track total received neighbors
      if(in_edata.received) received_neighbors++;
    } // end of receive all in messages
      // if all messages have been received then set as calibrated
    vdata.calibrated = 
      received_neighbors == context.in_edge_ids().size();
  } else {
    received_neighbors = context.in_edge_ids().size();
  }


  //////////////////////////////////////////////////////////////////
  // send any unset messages 
  // if we have recieve enough in messages
  if(received_neighbors + 1 >= context.in_edge_ids().size()) {
    factor_t& cavity(tls.cavity);
    foreach(edge_id_t out_eid, context.out_edge_ids()) {
      jtree_edge_data& out_edata = context.edge_data(out_eid);
      edge_id_t rev_eid = context.reverse_edge(out_eid);
      // if the out message is not calibrated try to calibrate it:
      if(!out_edata.calibrated) {
        bool ready_to_send = true;
        // check that all in messages (except the one we want to
        // send) have been recieved
        foreach(edge_id_t in_eid, context.in_edge_ids()) {
          const jtree_edge_data& in_edata = context.const_edge_data(in_eid);
          // if the in edge has not been received and is not from
          // the destination of the out edge then we cannot send
          if(!in_edata.received && rev_eid != in_eid) {
            ready_to_send = false;
            break;
          }
        } // check all neighbors are go for send

          // if we are ready to send then compute message
        if(ready_to_send) {
          cavity = vdata.factor;
          const jtree_edge_data& in_edata = context.const_edge_data(rev_eid);
          // construct cavity if necessary
          if(in_edata.received) {
            cavity /= in_edata.message;
            cavity.normalize();
          }
          // compute actual message
          out_edata.message.set_args(out_edata.variables);
          out_edata.message.marginalize(cavity);
          out_edata.message.normalize();
          out_edata.calibrated = true;
          // schedule the reception of the message
          callback.add_task(context.target(out_eid), jtree_sample_update, 1.0);      
        } // end of if ready to send
      } // end of if not calibrated
    } // end of loop over outbound messages
  } // of send all out messages


  //////////////////////////////////////////////////////////////////
  // Construct RB estimate and Sample if calibrated but not yet
  // sampled
  if(vdata.calibrated && !vdata.sampled) {
    // check that the parent is sampled and also determine which
    // variables are going to be sampled at this clique.  This is
    // done by finding the parent assignment if there is one
    assignment_t parent_asg;
    edge_id_t to_parent_eid = NULL_EID;

    // find the parent
    bool parent_found = false;
    foreach(edge_id_t out_eid, context.out_edge_ids()) {       
      const jtree_vertex_data& parent_vdata = 
        context.const_neighbor_vertex_data(context.target(out_eid));
      if(parent_vdata.sampled) {
        ASSERT_TRUE(parent_vdata.calibrated);
        ASSERT_FALSE(parent_found);
        parent_found = true;
        to_parent_eid = out_eid;
        const jtree_edge_data& parent_edata = 
          context.const_edge_data(to_parent_eid);
        parent_asg = 
          parent_vdata.asg.restrict(parent_edata.variables);
        ASSERT_EQ(parent_asg.args(), parent_edata.variables);            
        // break;
      }
    }

      
    // Determine the remaining variables for which we will need to
    // sample and construct RB estimates
    domain_t unsampled_variables = 
      vdata.variables - parent_asg.args();
    vdata.asg = parent_asg;

    // if there are unsampled variables then sample them
    if(unsampled_variables.num_vars() > 0) {
      // First update all the RB estimates for the unsampled
      // variables in the mrf graph
      factor_t& tmp_belief(tls.tmp_belief);
      for(size_t i = 0; i < unsampled_variables.num_vars(); ++i) {
        variable_t var = unsampled_variables.var(i);
        // Construct the RB belief estimate
        tmp_belief.set_args(var);
        tmp_belief.marginalize(vdata.factor);
        tmp_belief.normalize();
        // Update the MRF
        mrf_vertex_data& mrf_vdata = get_mrf_vdata(var.id());
        mrf_vdata.belief += tmp_belief;
      } 

      // Condition the belief on the parent assignmnet
      tmp_belief.set_args(unsampled_variables);
      tmp_belief.condition(vdata.factor, parent_asg);
      tmp_belief.normalize();

      // Sample the remaining variables from the belief
      assignment_t sample_asg = tmp_belief.sample();
      // Set the local assignment
      vdata.asg = sample_asg & parent_asg;
      // the assignment should exacty cover the variables
      ASSERT_EQ(vdata.asg.args(), vdata.variables);

      
      //// Fill out the MRF with the sampled variables
      for(size_t i = 0; i < sample_asg.num_vars(); ++i) {
        variable_t var = sample_asg.args().var(i);
        mrf_vertex_data& mrf_vdata = get_mrf_vdata(var.id());
        assignment_t local_asg = sample_asg.restrict(var);
        if(mrf_vdata.asg != local_asg.asg_at(0)) {
          mrf_vdata.nchanges++;
        }
        mrf_vdata.asg = local_asg.asg_at(0);
        mrf_vdata.nsamples++;
        // std::cout << graphlab::thread::thread_id()
        //           << ": sampling " << mrf_vdata.variable << std::endl;
        // remove the vertex from any trees
        mrf_vdata.tree_info.tree_id = NULL_VID;
        //        mrf_vdata.tree_info.in_tree = false;
        mrf_vdata.tree_info.height = 0;
          
        // double& logP = mrf_vdata.belief.logP(mrf_vdata.asg.asg_at(0));
        // logP = std::log( std::exp(logP) + 1.0 );          
      } 
    } // end of sampling unsampled variables

      // mark as sampled
    vdata.sampled = true;

    // Reschedule unsampled neighbors
    foreach(edge_id_t out_eid, context.out_edge_ids()) {
      if(out_eid != to_parent_eid) {
        const vertex_id_t neighbor_vid = context.target(out_eid);
        ASSERT_LT(neighbor_vid, context.num_vertices());
        callback.add_task(neighbor_vid, 
                          jtree_sample_update, 
                          1.0);
      }
    }
  } // End of if(!sampled) sampling procedure

} // End of update function


termination_condition::termination_condition() : 
  error(false), finish_time_seconds(-1), target_nsamples(0), target_ntrees(0),
  atomic_nsamples(0), atomic_ntrees(0) { }

bool termination_condition::finished() const {
  return
    error ||
    (finish_time_seconds > 0 &&
     finish_time_seconds < graphlab::lowres_time_seconds()) ||
    (target_nsamples > 0 && atomic_nsamples.value > target_nsamples) ||
    (target_ntrees > 0 && atomic_ntrees.value > target_ntrees);
}

void termination_condition::reset() {
  error = false;
  finish_time_seconds = -1;
  target_nsamples = 0;
  atomic_nsamples.value = 0;
  target_ntrees = 0;
  atomic_ntrees.value = 0;
}


jt_worker::jt_worker(size_t worker_id, 
                     const splash_settings& settings,
                     scope_factory_type& scope_factory, 
                     const std::vector<vertex_id_t>& root_perm,
                     termination_condition& terminator) :
  worker_id(worker_id),
  settings(settings),
  scope_factory_ptr(&scope_factory),
  root_index(root_perm.size()),
  root_perm_ptr(&root_perm),
  current_root(root_perm.at(worker_id)),
  terminator_ptr(&terminator),
  ncollisions(0) {  
  // Initialize local jtcore
  if(settings.subthreads > 1) {
    jt_core.set_scheduler_type("multiqueue_fifo");
    jt_core.set_scope_type("edge");
    jt_core.set_ncpus(settings.subthreads);
    jt_core.set_engine_type("async");
  } else {
    jt_core.set_scheduler_type("fifo");
    jt_core.set_scope_type("none");
    jt_core.set_ncpus(1);
    jt_core.set_engine_type("async_sim");
  }
} // end of init


// get a root
void jt_worker::run() {   
  // looup until runtime is reached
  while(!terminator_ptr->finished()) {
    /////////////////////////////////////////////////////////
    // Construct one tree (we must succeed in order to count a tree)
    advance_root();
    //    std::cout << "Worker " << worker_id << ": " << current_root << std::endl;
    // here we loop until the current root is sampled
    size_t sampled_variables = 0;
    while(sampled_variables == 0 &&  !terminator_ptr->finished()) {
      sampled_variables = splash_once();
      // If sample once failed due to collision record a collision event
      if(sampled_variables == 0) {
        ncollisions++;
        // sched_yield();
      }
    }
    // if variables where sampled in the splash increment the atomic
    // counters.
    if(sampled_variables > 0) {
      terminator_ptr->atomic_nsamples.inc(sampled_variables);
      terminator_ptr->atomic_ntrees.inc();
    }
  } 
  //  std::cout << "N Collisions: " << ncollisions << std::endl;
} // end of run


void jt_worker::advance_root() {  
  root_index += settings.ntrees;
  if(root_index >= root_perm_ptr->size()) root_index = worker_id;
  current_root = root_perm_ptr->at(root_index);
}


///////////////////////////////////////////////////////////////////////
/// Markov Blanket Locking Helper functions


/**
 * See if the vertex can be grabbed into this workers tree. If true we
 * still need to actually grab the vertex (which could still
 * fail). However if the vertex is currently unavailable we could save
 * time by not even trying (although it may become available later).
 */
bool jt_worker::is_vertex_available(vertex_id_t vid) {
  ASSERT_NE(scope_factory_ptr, NULL);
  const mrf_graph_type& mrf(scope_factory_ptr->get_graph());
  const mrf_vertex_data& vdata = mrf.vertex_data(vid);
  // Check that this vertex is not already in a tree
  bool in_tree = vdata.tree_info.tree_id != NULL_VID;
  if(in_tree) return false;
  // check that the neighbors are not in any other trees than this
  // one
  const mrf_gl::edge_list& in_eids = mrf.in_edge_ids(vid);
  foreach(edge_id_t in_eid, in_eids) {
    vertex_id_t neighbor_vid = mrf.source(in_eid);
    const mrf_vertex_data& vdata = mrf.vertex_data(neighbor_vid);
    bool in_tree = vdata.tree_info.tree_id != NULL_VID;
    // if the neighbor is in a tree other than this one quit
    if(in_tree && worker_id != vdata.tree_info.tree_id) return false;
  }
  return true;
} // end of try grab vertex


/**
 * Grab this vertex into the tree owned by worker id.  If this returns
 * true than the vertex is marked as grabbed. This must be called
 * within the context of an edge scope.
 */
bool jt_worker::try_grab_vertex(iscope_type& scope) {
  // Check that this vertex is not already in a tree
  bool in_tree = scope.vertex_data().tree_info.tree_id != NULL_VID;
  if(in_tree) return false;

  // check that the neighbors are not in any other trees than this
  // one
  vertex_id_t min_height(std::numeric_limits<vertex_id_t>::max());
  foreach(edge_id_t in_eid, scope.in_edge_ids()) {
    vertex_id_t neighbor_vid = scope.source(in_eid);
    const mrf_vertex_data& vdata = 
      scope.const_neighbor_vertex_data(neighbor_vid);
    bool in_tree = vdata.tree_info.tree_id != NULL_VID;
    // if the neighbor is in a tree other than this one quit
    if(in_tree && worker_id != vdata.tree_info.tree_id) return false;
    if(in_tree) min_height = std::min(min_height, vdata.tree_info.height);
  }
  // Assert that this vertex is not in a tree and that none of the
  // neighbors are in other trees This vertex does not neighbor any
  // other trees than this one
  scope.vertex_data().tree_info.tree_id = worker_id;
  //  scope.vertex_data().tree_info.in_tree = true;
  scope.vertex_data().tree_info.height = min_height + 1;
  return true;
} // end of try grab vertex


/**
 * Release the vertex
 */
void jt_worker::release_vertex(iscope_type& scope) {
  // This vertex does not neighbor any other trees than this one
  scope.vertex_data().tree_info.tree_id = NULL_VID;
  //  scope.vertex_data().tree_info.in_tree = false;
  scope.vertex_data().tree_info.height = 0;
} // release the vertex


///////////////////////////////////////////////////////////////////////
/// Scoring helper functions

/**
 * This function returns the priority of a particular vertex.
 */
double jt_worker::score_vertex(vertex_id_t vid) {
  ASSERT_NE(scope_factory_ptr, NULL);
  mrf_graph_type& mrf(scope_factory_ptr->get_graph());
  mrf_vertex_data& vdata = mrf.vertex_data(vid);

  if (vdata.nsamples < settings.vanish_updates || 
      vdata.tree_info.priority < 0) {
    vdata.tree_info.priority = score_vertex_log_odds(vid); 
  }
  return vdata.tree_info.priority;
}


double jt_worker::score_vertex_l1_diff(vertex_id_t vid) {
  // Get the scope factory
  ASSERT_NE(scope_factory_ptr, NULL);
  const mrf_graph_type& mrf(scope_factory_ptr->get_graph());
  const mrf_vertex_data& vdata = mrf.vertex_data(vid);

  // Construct the domain of neighbors that are already in the tree
  domain_t vars = vdata.variable;
  foreach(edge_id_t ineid, mrf.in_edge_ids(vid)) {
    const vertex_id_t neighbor_vid = mrf.source(ineid);
    const mrf_vertex_data& neighbor = mrf.vertex_data(neighbor_vid);
    // test to see if the neighbor is in the tree by checking the
    // elimination time map
    if(jt_list.contains(neighbor_vid)) {
      vars += neighbor.variable;
      // If this vertex has too many tree neighbor than the priority
      // is set to -1;
      if(vars.num_vars() > settings.max_tree_width + 1) return -1;
      if(vars.size() > settings.max_factor_size) return -1;
    } 
  }

  // Compute the clique factor
  clique_factor.set_args(vars);
  clique_factor.uniform();
  // get all the factors const factorized_model::factor_map_t&
  // factors(SHARED_FACTORS.get());
  const factorized_model::factor_map_t& factors(*SHARED_FACTORS_PTR);
  // Iterate over the factors and multiply each into this factor
  foreach(size_t factor_id, vdata.factor_ids) {
    const factor_t& factor = factors[factor_id];      
    // Build up an assignment for the conditional
    domain_t conditional_args = factor.args() - vars;
    if(conditional_args.num_vars() > 0) {
      assignment_t conditional_asg;
      for(size_t i = 0; i < conditional_args.num_vars(); ++i) {
        const mrf_vertex_data& neighbor_vdata = 
          mrf.vertex_data(conditional_args.var(i).id());
        conditional_asg &= 
          assignment_t(neighbor_vdata.variable, neighbor_vdata.asg);
      }
      // set the factor arguments
      conditional_factor.set_args(factor.args() - conditional_args);
      conditional_factor.condition(factor, conditional_asg);        
      // Multiply the conditional factor in
      clique_factor *= conditional_factor;
      //       clique_factor.normalize();
    } else {
      clique_factor *= factor;
    }
  } // end of loop over factors
  clique_factor.normalize();


  // Compute the product of marginals
  product_of_marginals_factor.set_args(vars);
  product_of_marginals_factor.uniform();
  for(size_t i = 0; i < vars.num_vars(); ++i) {
    marginal_factor.set_args(vars.var(i));
    marginal_factor.marginalize(clique_factor);
    marginal_factor.normalize();
    product_of_marginals_factor *= marginal_factor;
  }
  product_of_marginals_factor.normalize();

  // Compute the residual
  double residual = clique_factor.l1_diff(product_of_marginals_factor);

  ASSERT_GE( residual, 0);
  ASSERT_FALSE( std::isnan(residual) );
  ASSERT_TRUE( std::isfinite(residual) );

  // ensure score is bounded
  //    residual = std::tanh(residual);

  return residual;

} // end of score l1 diff


double jt_worker::score_vertex_log_odds(vertex_id_t vid) {
  // Get the scope factory
  const mrf_graph_type& mrf(scope_factory_ptr->get_graph());
  const mrf_vertex_data& vdata(mrf.vertex_data(vid));

  // Construct the domain of neighbors that are already in the tree
  domain_t vars = vdata.variable;
  foreach(edge_id_t ineid, mrf.in_edge_ids(vid)) {
    const vertex_id_t neighbor_vid = mrf.source(ineid);
    const mrf_vertex_data& neighbor = mrf.vertex_data(neighbor_vid);
    // test to see if the neighbor is in the tree by checking the
    // elimination time map
    if(jt_list.contains(neighbor_vid)) {
      vars += neighbor.variable;
      // If this vertex has too many tree neighbor than the priority
      // is set to 0;
      if(vars.num_vars() > settings.max_tree_width + 1) return -1;
      if(vars.size() > settings.max_factor_size) return -1;
    } 
  }
    
  ASSERT_EQ(vars.num_vars(),  2);


  // Compute the clique factor
  clique_factor.set_args(vars);
  clique_factor.uniform();
  // get all the factors
  // const factorized_model::factor_map_t& factors(SHARED_FACTORS.get());
  const factorized_model::factor_map_t& factors(*SHARED_FACTORS_PTR);

  // Iterate over the factors and multiply each into this factor
  foreach(size_t factor_id, vdata.factor_ids) {
    const factor_t& factor = factors[factor_id];      
    // Build up an assignment for the conditional
    domain_t conditional_args = factor.args() - vars;
    if(conditional_args.num_vars() > 0) {
      assignment_t conditional_asg;
      for(size_t i = 0; i < conditional_args.num_vars(); ++i) {
        const mrf_vertex_data& neighbor_vdata = 
          mrf.vertex_data(conditional_args.var(i).id());
        conditional_asg &= 
          assignment_t(neighbor_vdata.variable, neighbor_vdata.asg);
      }
      // set the factor arguments
      conditional_factor.set_args(factor.args() - conditional_args);
      conditional_factor.condition(factor, conditional_asg);        
      // Multiply the conditional factor in
      clique_factor *= conditional_factor;
      //        clique_factor.normalize();
    } else {
      clique_factor *= factor;
    }
  } // end of loop over factors
    // Compute the conditional factor and marginal factors
  conditional_factor.set_args(vars - vdata.variable);
  conditional_factor.condition(clique_factor, 
                               assignment_t(vdata.variable, vdata.asg));  
  marginal_factor.set_args(vars - vdata.variable);
  marginal_factor.marginalize(clique_factor);
    
  // Compute metric
  conditional_factor.normalize();
  marginal_factor.normalize();
  // double residual = conditional_factor.l1_logdiff(marginal_factor);
  double residual = conditional_factor.l1_diff(marginal_factor);

  // rescale by updates
  //    residual = residual / (vdata.updates + 1);

  ASSERT_GE( residual, 0);
  ASSERT_FALSE( std::isnan(residual) );
  ASSERT_TRUE( std::isfinite(residual) );

  // ensure score is bounded
  //    residual = std::tanh(residual);

  return residual;
} // end of score vertex


double jt_worker::score_vertex_lik(vertex_id_t vid) {
  // Get the scope factory
  const mrf_graph_type& mrf(scope_factory_ptr->get_graph());
  const mrf_vertex_data& vdata(mrf.vertex_data(vid));

  // Construct the domain of neighbors that are already in the tree
  domain_t vars = vdata.variable;
  foreach(edge_id_t ineid, mrf.in_edge_ids(vid)) {
    const vertex_id_t neighbor_vid = mrf.source(ineid);
    const mrf_vertex_data& neighbor = mrf.vertex_data(neighbor_vid);
    // test to see if the neighbor is in the tree by checking the
    // elimination time map
    if(jt_list.contains(neighbor_vid)) {
      vars += neighbor.variable;
      // If this vertex has too many tree neighbor than the priority
      // is set to 0;
      if(vars.num_vars() > settings.max_tree_width + 1) return -1;
      if(vars.size() > settings.max_factor_size) return -1;
    } 
  }
    
  // Compute the clique factor
  clique_factor.set_args(vars);
  clique_factor.uniform();
  // get all the factors
  // const factorized_model::factor_map_t& factors(SHARED_FACTORS.get());
  const factorized_model::factor_map_t& factors(*SHARED_FACTORS_PTR);

  // Iterate over the factors and multiply each into this factor
  foreach(size_t factor_id, vdata.factor_ids) {
    const factor_t& factor = factors[factor_id];      
    // Build up an assignment for the conditional
    domain_t conditional_args = factor.args() - vars;
    if(conditional_args.num_vars() > 0) {
      assignment_t conditional_asg;
      for(size_t i = 0; i < conditional_args.num_vars(); ++i) {
        const mrf_vertex_data& neighbor_vdata = 
          mrf.vertex_data(conditional_args.var(i).id());
        conditional_asg &= 
          assignment_t(neighbor_vdata.variable, neighbor_vdata.asg);
      }
      // set the factor arguments
      conditional_factor.set_args(factor.args() - conditional_args);
      conditional_factor.condition(factor, conditional_asg);        
      // Multiply the conditional factor in
      clique_factor *= conditional_factor;
      //        clique_factor.normalize();
    } else {
      clique_factor *= factor;
    }
  } // end of loop over factors

    // Compute the conditional factor and marginal factors
  marginal_factor.set_args(vdata.variable);
  marginal_factor.marginalize(clique_factor);
  marginal_factor.normalize();
  double residual =  1.0 - exp(marginal_factor.logP(vdata.asg));

  ASSERT_GE( residual, 0);
  ASSERT_FALSE( std::isnan(residual) );
  ASSERT_TRUE(  std::isfinite(residual) );

  // // ensure score is bounded
  // residual = std::tanh(residual);


  return residual;
} // end of max lik


///////////////////////////////////////////////////////////////////////
/// Tree Growing helper functions


void jt_worker::grow_bfs_jtree() {
  ASSERT_NE(scope_factory_ptr, NULL);
  // Get the scope factory
  mrf_graph_type& mrf = scope_factory_ptr->get_graph();
  // Clear local data structures
  jt_list.clear();
  bfs_queue.clear();
  visited.clear();
     
  // add the root
  bfs_queue.push_back(current_root);
  visited.insert(current_root);

  while(!bfs_queue.empty() && 
        jt_list.cliques.size() < settings.max_tree_size) {
    // Take the top element
    const vertex_id_t next_vertex = bfs_queue.front();
    bfs_queue.pop_front();

    // pretest that the vertex is available before trying to get it
    if(!is_vertex_available(next_vertex)) continue;

    // Maybe we can get the vertex so actually try to get it by first
    // grabbing the lock (scope) for the vertex
    iscope_type* scope_ptr = 
      scope_factory_ptr->get_edge_scope(worker_id, next_vertex);
    ASSERT_NE(scope_ptr, NULL);
    iscope_type& scope(*scope_ptr);

    // See if we can get the vertex for this tree
    if(!try_grab_vertex(scope)) {
      // release the scope and move on
      scope_factory_ptr->release_scope(&scope);        
      continue;
    }

    // Assert that we own the vertex at this point
    ASSERT_EQ(scope.vertex_data().tree_info.tree_id, worker_id);
    
    // Determine if this is the root (it is the root if there are no
    // cliques in the junction tree).
    bool is_root = jt_list.cliques.empty();   
    // Set the height of the root to be zero explicity
    if(is_root) scope.vertex_data().tree_info.height = 0;
 
    // Check if it is safe to extend to the tree to include next variable
    bool extended_jtree =
      scope.vertex_data().tree_info.height < settings.max_tree_height
      &&
      jt_list.extend(next_vertex,
                     mrf,
                     settings.max_tree_width,
                     settings.max_factor_size);

    // If we were unable to extend the tree then release the vertex
    if(!extended_jtree) {
      release_vertex(scope);     
      scope_factory_ptr->release_scope(&scope);        
      continue;
    } 


    // add the neighbors to the search queue
    foreach(edge_id_t eid, mrf.out_edge_ids(next_vertex)) {
      vertex_id_t neighbor_vid = mrf.target(eid);
      if(visited.count(neighbor_vid) == 0) {
        bfs_queue.push_back(neighbor_vid);
        visited.insert(neighbor_vid);
      }
    }    

    // Release the scope and let neighbors start to run
    scope_factory_ptr->release_scope(&scope);        
  } // end of while loop

} // end grow_bfs_jtree


void jt_worker::grow_prioritized_jtree() {
  ASSERT_NE(scope_factory_ptr, NULL);
  // Get the scope factory
  mrf_graph_type& mrf = scope_factory_ptr->get_graph();
  // Clear local data structures
  jt_list.clear();
  priority_queue.clear();
  visited.clear();
     
  // add the root
  priority_queue.push(current_root, 1.0);
  visited.insert(current_root);

  while(!priority_queue.empty() && 
        jt_list.cliques.size() < settings.max_tree_size) {
    // Take the top element
    const vertex_id_t next_vertex = priority_queue.pop().first;

    // pretest that the vertex is available before trying to get it
    if(!is_vertex_available(next_vertex)) continue;

    // Maybe we can get the vertex so actually try to get it by first
    // grabbing the lock (scope) for the vertex
    iscope_type* scope_ptr = 
      scope_factory_ptr->get_edge_scope(worker_id, next_vertex);
    ASSERT_NE(scope_ptr, NULL);
    iscope_type& scope(*scope_ptr);

    // See if we can get the vertex for this tree
    if(!try_grab_vertex(scope)) {
      // release the scope and move on
      scope_factory_ptr->release_scope(&scope);        
      continue;
    }

    // Assert that we own the vertex at this point
    ASSERT_EQ(scope.vertex_data().tree_info.tree_id, worker_id);

    // Determine if this is the root (it is the root if there are no
    // cliques in the junction tree).
    bool is_root = jt_list.cliques.empty();   
    // Set the height of the root to be zero explicity
    if(is_root) scope.vertex_data().tree_info.height = 0;
          
    // test the 
    bool extended_jtree =
      scope.vertex_data().tree_info.height < settings.max_tree_height
      &&
      jt_list.extend(next_vertex,
                     mrf,
                     settings.max_tree_width,
                     settings.max_factor_size);                    

    // If we were unable to extend the tree then release the vertex
    if(!extended_jtree) {
      release_vertex(scope);
      scope_factory_ptr->release_scope(&scope);
      continue;
    }

    // If the tree was extended, extend the boundary by adding the
    // neighbors of the newly added vertex to the tree

    // add the neighbors to the search queue or update their priority
    foreach(edge_id_t eid, mrf.out_edge_ids(next_vertex)) {
      vertex_id_t neighbor_vid = mrf.target(eid);          
      if(visited.count(neighbor_vid) == 0) {
        visited.insert(neighbor_vid);
        // Vertex has not yet been visited
        double score = score_vertex(neighbor_vid);
        // if the score is greater than zero then add the neighbor
        // to the priority queue.  The score is zero if there is no
        // advantage or the treewidth is already too large
        if(score >= 0) priority_queue.push(neighbor_vid, score);       
      } else if(priority_queue.contains(neighbor_vid)) {
        // vertex is still in queue we may need to recompute
        // score
        double score = score_vertex(neighbor_vid);
        if(score >= 0) {
          // update the priority queue with the new score
          priority_queue.update(neighbor_vid, score);
        } else {
          // The score computation revealed that the clique would be
          // too large so simply remove the vertex from the priority
          // queue
          priority_queue.remove(neighbor_vid);
        }
      } // otherwise the vertex has been visited and processed
    } // end of foreach    

    // Release the scope and let neighbors start to run
    scope_factory_ptr->release_scope(&scope);        

  } // end of while loop
    
} // end grow_prioritized_jtree


size_t jt_worker::splash_once() {
  if(settings.priorities) {
    // grow the prioritized junction tree data structure
    grow_prioritized_jtree();
  } else {
    // grow the bfs junction tree data structure
    grow_bfs_jtree();
  }

  ASSERT_NE(scope_factory_ptr, NULL);
  // Get the scope factory
  mrf_graph_type& mrf = scope_factory_ptr->get_graph();
    
  // If we failed to build a tree return failure
  if(jt_list.cliques.empty()) return 0;

  //  std::cout << "Varcount: " << jt_list.cliques.size() << std::endl;  

  // ///////////////////////////////////
  // // plot the graph
  // if(worker_id == 0) {
  //   std::cout << "Saving treeImage:" << std::endl;
  //   size_t rows = std::sqrt(mrf.num_vertices());
  //   image img(rows, rows);
  //   for(vertex_id_t vid = 0; vid < mrf.num_vertices(); ++vid) {
  //     vertex_id_t tree_id = mrf.vertex_data(vid).tree_id;
  //     img.pixel(vid) = 
  //         tree_id == vertex_id_t(-1)? 0 : tree_id + worker_count;
  //   }
  //   img.save(make_filename("tree", ".pgm", tree_count).c_str());
  // }

  // Build the junction tree and sample
  jt_core.graph().clear();
  size_t num_factors = (*SHARED_FACTORS_PTR).size();

  // jt_list.validate();

  jt_list.load_graph(mrf, num_factors, jt_core.graph());

  // Rebuild the engine (clear the old scheduler)
  jt_core.rebuild_engine();
  
  // add tasks to all vertices
  jt_core.add_task_to_all(jtree_sample_update, 1.0);

  // Run the core
  jt_core.start();


  // Check that the junction tree is sampled
  size_t actual_tree_width = 0;
  for(vertex_id_t vid = 0; 
      vid < jt_core.graph().num_vertices(); ++vid) {
    const jtree_vertex_data& vdata = jt_core.graph().vertex_data(vid);
    ASSERT_TRUE(vdata.sampled);
    ASSERT_TRUE(vdata.calibrated);
    ASSERT_GT(vdata.variables.num_vars(), 0);
    actual_tree_width = 
      std::max(vdata.variables.num_vars() - 1, actual_tree_width); 
  }    
  //  std::cout << "Treewidth: " << actual_tree_width << std::endl;

  // Return the number of variables in the tree
  return jt_list.elim_time.size();
} // end of sample once


jt_splash_sampler::
jt_splash_sampler(mrf_graph_type& mrf, 
                  const splash_settings& settings) :
  workers(settings.ntrees, NULL),
  scope_factory(mrf, settings.ntrees, 
                graphlab::scope_range::EDGE_CONSISTENCY),
  root_perm(mrf.num_vertices()) { 
  ASSERT_LE(settings.ntrees, mrf.num_vertices());
  
  // Set the shared graph pointer
  shared_mrf_ptr = &mrf;

  // Shuffle the root ordering 
  for(vertex_id_t vid = 0; vid < mrf.num_vertices(); ++vid)
    root_perm[vid] = vid;
  graphlab::random::shuffle(root_perm.begin(), root_perm.end());
  
  // initialize the worker thread objects
  for(size_t i = 0; i < workers.size(); ++i) {
    workers[i] = 
      new jt_worker(i, settings, scope_factory, root_perm, terminator);
  }
} // end of constructor

jt_splash_sampler::~jt_splash_sampler() {
  for(size_t i = 0; i < workers.size(); ++i) {
    if(workers[i] != NULL) {
      delete workers[i];
      workers[i] = NULL;
    }
  }
}

size_t jt_splash_sampler::total_collisions() const {
  size_t total_collisions = 0;
  foreach(const jt_worker* worker, workers)  {
    ASSERT_NE(worker, NULL);
    total_collisions += worker->ncollisions;
  }
  return total_collisions;
}


size_t jt_splash_sampler::total_trees() const {
  return terminator.atomic_ntrees.value;
}

size_t jt_splash_sampler::total_samples() const {
  return terminator.atomic_nsamples.value;
}

  
void jt_splash_sampler::sample_seconds(float runtime_secs) {
  // Set the termination condition
  terminator.reset();
  terminator.finish_time_seconds = 
    graphlab::lowres_time_seconds() + runtime_secs;
  run();
}                   

void jt_splash_sampler::sample_trees(size_t total_trees) {
  // Set the termination condition
  terminator.reset();
  terminator.target_ntrees = total_trees;
  run();
}

void jt_splash_sampler::sample_updates(size_t total_updates) {
  // Set the termination condition
  terminator.reset();
  terminator.target_nsamples = total_updates;
  run();
}


void jt_splash_sampler::run() {
  // create worker threads
  graphlab::thread_group threads;
  if(workers.size() == 1) {
    ASSERT_NE(workers[0], NULL);
    workers[0]->run();
  } else {
    // Launch the threads
    for(size_t i = 0; i < workers.size(); ++i) {   
      ASSERT_NE(workers[i], NULL);
      // if(use_cpu_affinity) 
      //   threads.launch(boost::bind(&jt_worker::run, &(workers[i])), i);
      // else 
      threads.launch(boost::bind(&jt_worker::run, workers[i]));
    }
    const char* exception_message = "Exception!";
    // Wait for all threads to finish
    while (threads.running_threads() > 0) {
      try {
        threads.join();
      } catch(const char* error) {
        logstream(LOG_ERROR) << "Exception Caught:\n\t" << error << std::endl;
        exception_message = error;
        // killall the running threads
        terminator.error = true;
      }
    }
    if(terminator.error) {
      throw exception_message;
    }
  }
}


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/jt_splash_sampler.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_JT_SPLASH_SAMPLER_HPP
#define PGIBBS_JT_SPLASH_SAMPLER_HPP


#include <iostream>
#include <fstream>
#include <vector>
#include <map>
#include <set>
#include <deque>
#include <string>
#include <cassert>
#include <algorithm>

#include <boost/unordered_set.hpp>


// Including Standard Libraries


#include <graphlab/parallel/pthread_tools.hpp>
#include <graphlab/util/timer.hpp>

#include <graphlab.hpp>

#include "factorized_model.hpp"
#include "mrf.hpp"
#include "junction_tree.hpp"


/**
 * The settings for the jt_splash_sampler.  Originally these formed a
 * long list of arguments but since the order can easily introduce
 * bugs we switched to a struct.
 */
struct splash_settings {
  size_t ntrees;
  size_t max_tree_size;
  size_t max_tree_height;
  size_t max_tree_width;
  size_t max_factor_size;
  bool   priorities;
  size_t vanish_updates; 
  size_t subthreads;

  splash_settings() : 
    ntrees(2), 
    max_tree_size(std::numeric_limits<size_t>::max()),
    max_tree_height(std::numeric_limits<size_t>::max()),
    max_tree_width(2),
    max_factor_size(std::numeric_limits<size_t>::max()),
    priorities(false),
    vanish_updates(10),
    subthreads(1) { }
};


/**
 * Run the jtsplash sampler
 *
 */
void run_jtsplash_sampler(mrf_graph_type& mrf_graph,
                          const std::string& jtsplash_results_fn,
                          const std::vector<double>& runtimes,
                          const bool draw_images,
                          const splash_settings& settings);


/**
 * This fairly complex update function assembles the clique factors by
 * conditioning on variables not in the tree.  Then it computes
 * messages at each clique to calibrate the junction tree.  Finally,
 * using the messages and the conditioned parents, it samples each
 * clique constructing new assignments to each variable.
 */
class jtree_update :
  public graphlab::iupdate_functor<jtree_graph_type, jtree_update> {
public:  
  typedef graphlab::iupdate_functor<jtree_graph_type, jtree_update> base;
  jtree_update(mrf_graph_type* mrf_ptr = NULL) : mrf_ptr(mrf_ptr) { }
  mrf_graph_type* mrf_ptr;
  void operator()(base::icontext_type& context);
}; // end of class jtree_update


// Termination management
struct termination_condition {
  bool   error;
  float  finish_time_seconds;
  size_t target_nsamples;
  size_t target_ntrees;
  graphlab::atomic<size_t> atomic_nsamples;
  graphlab::atomic<size_t> atomic_ntrees;
  termination_condition();
  bool finished() const;
  void reset();
};


//! Predecleration 
//! The jt worker executes splashes sequential within each thread.
class jt_builder : 
  public graphlab::iupdate_functor<mrf_graph_type, jt_builder>{
public:
  typedef graphlab::iupdate_functor<mrf_graph_type, jt_builder> base;

  struct splash_state {
    size_t worker_id;
    splash_settings settings;
    // Tree building data structures 
    size_t root_index;
    const std::vector<vertex_id_t>* root_perm_ptr;
    vertex_id_t current_root;
    //! track termination
    termination_condition* terminator_ptr;
    mrf_graph_type* graph_ptr;
    //! Track the collisions with the roots
    size_t ncollisions;
    //! Local junction tree graphlab core
    jtree_gl::core jt_core;
    /**
     * Local jt list used to build on the structure of the
     * jt_core.graph()
     */
    jtree_list jt_list;
    /**
     * Local data structures to reduce thread contention
     */
    std::deque<vertex_id_t> bfs_queue;
    graphlab::mutable_queue<size_t, double> priority_queue;
    boost::unordered_set<vertex_id_t> visited;
    factor_t clique_factor;
    factor_t product_of_marginals_factor;
    factor_t conditional_factor;
    factor_t marginal_factor;
  };

  std::set<splash_state*> state_set;
  jt_worker(splash_state* state_ptr = NULL);
  void operator+=(const jt_builder& other);


  //! The main loop
  void run();
  
private:
  //! Construct a single splash
  size_t splash_once();
  //! advance the root
  void advance_root();
  /**
   * Grab this vertex into the tree owned by worker id
   */
  bool is_vertex_available(vertex_id_t vid);
  /**
   * Grab this vertex into the tree owned by worker id
   */
  bool try_grab_vertex(iscope_type& scope);
  /**
   * Release the vertex
   */
  void release_vertex(iscope_type& scope);


  double score_vertex(vertex_id_t vid);
  double score_vertex_l1_diff(vertex_id_t vid);
  double score_vertex_log_odds(vertex_id_t vid);
  double score_vertex_lik(vertex_id_t vid);

  void grow_bfs_jtree();
  void grow_prioritized_jtree();
};  // End of JT worker


/**
 * The jt_splash_sampler implements the junction tree based Gibbs
 * sampler defined in:
 *
 *  Parallel Gibbs Sampling: From Colored Fields to Think Junction Trees
 *   by Joseph Gonzalez, Yucheng Low, Arthur Gretton, and Carlos Guestrin
 *  
 */
class jt_splash_sampler {
public:
  typedef graphlab::general_scope_factory<mrf_graph_type>
  scope_factory_type;

private:
  std::vector<jt_worker*>     workers;
  scope_factory_type          scope_factory;
  std::vector< vertex_id_t >  root_perm;
  termination_condition       terminator;
public:
  jt_splash_sampler(mrf_graph_type& mrf_core,
                    const splash_settings& settings);
  ~jt_splash_sampler();


  /**
   * Get the number of times the splash sampler collided on a root.
   * This minor race event can lead to wasted cpu cycles but does not
   * affect the quality of the samples.
   */
  size_t total_collisions() const;

  /**
   * Get the total number of trees constructed on the last run
   */
  size_t total_trees() const;

  /**
   * Get the total number of single variable updates on the last run.
   */
  size_t total_samples() const;

  /** Run the splash sampler for a fixed number of seconds */
  void sample_seconds(float runtime_secs);
  /** Run the splash sampler for a fixed number of trees */
  void sample_trees(size_t total_trees);
  /**  
   * Run the splash sampler for a fixed number of single variable *
   * updates
   */
  void sample_updates(size_t total_updates);
private:
  void run();
};


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/junction_tree.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "junction_tree.hpp"


#include <graphlab/util/stl_util.hpp>

#include <graphlab/macros_def.hpp>

/**
 * Extend the jtree_list data structure by eliminating the vertex.  If
 * the jtree list can be extended then it is extended and this
 * function returns true.
 *
 **/
bool jtree_list::
extend(const mrf_graph_type::vertex_id_type elim_vertex,
       const mrf_graph_type& mrf,
       const size_t max_tree_width,
       const size_t max_factor_size) {
  typedef mrf_graph_type::edge_id_type edge_id_type;
  typedef mrf_graph_type::vertex_id_type vertex_id_type;
  // sanity check: The vertex to eliminate should not have already
  // been eliminated
  ASSERT_FALSE( contains(elim_vertex) );
  /// =====================================================================
  // Construct the elimination clique for the new vertex
  // 1) Fill out clique
  // 2) Track the corresponding factor size and treewidth
  // 3) Find the parent of this clique
  jtree_list::elim_clique clique;
  clique.elim_vertex = elim_vertex;
  // the factor must at least have the eliminated vertex
  size_t factor_size = 
    std::max(mrf.vertex_data(elim_vertex).variable.size(),
             uint32_t(1));
  foreach(const edge_id_type ineid, mrf.in_edge_ids(elim_vertex)) {
    const vertex_id_type vid = mrf.source(ineid);
    const bool is_in_jtree = contains(vid);
    // if the neighbor is in the set of vertices being eliminated
    if(is_in_jtree) {      
      clique.vertices += vid;
      factor_size *= 
        std::max(mrf.vertex_data(vid).variable.size(), uint32_t(1) );
    }
    // if the clique ever gets too large then teminate
    // the + 1 is because we need to include the elim vertex
    if((clique.vertices.size() > max_tree_width) || 
       (max_factor_size > 0 && factor_size > max_factor_size)) 
      return false;
  }

  // Determine the parent of this clique -------------------------
  vertex_id_type parent_id = 0;
  foreach(vertex_id_type vid, clique.vertices)
    parent_id = std::max(parent_id, elim_time_lookup(vid));
  clique.parent = parent_id;

  /// =====================================================================
  // Simulate injecting vertices in parent cliques back to when RIP is
  // satisfied
  vertex_set rip_verts = clique.vertices;
  for(vertex_id_t parent_vid = clique.parent; 
      !rip_verts.empty() && parent_vid < cliques.size(); ) {
    const jtree_list::elim_clique& parent_clique = cliques[parent_vid];    
    
    // Remove the parent vertex
    rip_verts -= vertex_set(parent_clique.elim_vertex);

    // Construct the new vertices that would normally be stored at
    // this vertes
    const vertex_set tmp_verts = rip_verts + parent_clique.vertices;

    // Check that the expanded clique is still within tree width
    if(tmp_verts.size()  > max_tree_width) return false;

    // If we care about the maximum factor size Compute the factor
    // size and fail if the factor is too large
    if(max_factor_size > 0) {
      size_t factor_size = 
        std::max(mrf.vertex_data(parent_clique.elim_vertex).variable.size(),
                 uint32_t(1));
      foreach(vertex_id_t vid, tmp_verts) {
        factor_size *= 
          std::max(mrf.vertex_data(vid).variable.size(), uint32_t(1));
      }
      if(factor_size > max_factor_size) return false;
    }

    // Find the new parent
    vertex_id_t new_parent_vid = 0;
    foreach(vertex_id_t vid, tmp_verts) {
      new_parent_vid = 
        std::max(new_parent_vid, elim_time_lookup(vid));
    }
    // if the parent changes then we may need to update RIP with
    // tmp_verts otherwise we use rip_verts
    if(new_parent_vid != parent_clique.parent) 
      rip_verts = tmp_verts;
    else
      rip_verts -= parent_clique.vertices;
    
    // move up the tree
    parent_vid = new_parent_vid;
  }


  /// =====================================================================
  // Assert that if we reached this point RIP can be satisfied safely
  // so proceed to update local data structures
  const size_t new_elim_time = cliques.size();
  cliques.push_back(clique);
  elim_time[clique.elim_vertex] = new_elim_time;

  /// =====================================================================
  // Satisfy RIP
  rip_verts = clique.vertices;
  for(vertex_id_t parent_vid = clique.parent; 
      !rip_verts.empty() && parent_vid < cliques.size(); ) {
    // get the parent clique
    jtree_list::elim_clique& parent_clique = cliques[parent_vid];       

    // otherwise update that the rip_verts
    rip_verts -= vertex_set(parent_clique.elim_vertex);


    // Construct the new vertices that would normally be stored at
    // this vertes
    const vertex_set tmp_verts = 
      rip_verts + parent_clique.vertices;

    // Determine the new parent (except first vertex)
    vertex_id_t new_parent_vid = 0;
    foreach(vertex_id_t vid, tmp_verts) {
      new_parent_vid = 
        std::max(new_parent_vid, elim_time_lookup(vid));
    }

    //! if the parent changes we must update the rip_verts and the parent value
    if(new_parent_vid != parent_clique.parent) { 
      rip_verts = tmp_verts;
      parent_clique.parent = new_parent_vid;
    } else {
      // If the parent is unchanged then we can remove all the
      // variables stored locally from the rip_verts since they all
      // already satisfy RIP.
      rip_verts -= parent_clique.vertices;
    }
    // update the local vertices
    parent_clique.vertices = tmp_verts;

    // Move up tree
    parent_vid = new_parent_vid;
  }

  // Ensure that the parent of the first clique is the null VID
  cliques.front().parent = NULL_VID;
  // Add successfully
  return true;
} // end of extend clique list


/**
 * Convert a jtree_list into a jtree_graph 
 */
void jtree_list::
load_graph(const mrf_graph_type& mrf,
           const size_t num_factors,
           jtree_graph_type& jt_graph) const {
  //! Todo: Turn this into stack allocated boolean vector
  std::vector<bool> assigned_factors(num_factors, false);
  

  {  // Construct the junction tree
    // Ensure that the parent of the root is identifiable
    ASSERT_EQ(cliques.front().parent, NULL_VID); 
    foreach(const jtree_list::elim_clique& clique, cliques) {      
      const mrf_vertex_data& elim_vertex_vdata = 
        mrf.vertex_data(clique.elim_vertex);
      // Create the vertex data
      jtree_vertex_data vdata;
      // set the vertex parent
      vdata.parent = clique.parent;
      // add the eliminated vertex
      vdata.variables = elim_vertex_vdata.variable;
      // add all the other variables in the clique
      foreach(vertex_id_t vid, clique.vertices) 
        vdata.variables += mrf.vertex_data(vid).variable;      
      // Add the vertex to the junction tree
      vertex_id_t child_id = jt_graph.add_vertex(vdata);
      // get the cliques parent
      vertex_id_t parent_id = clique.parent;
      // Add the edge to parent if not root
      if(parent_id != NULL_VID) {
        // Get the parent vertex data
        const jtree_vertex_data& parent_vdata =
          jt_graph.vertex_data(parent_id);
        jtree_edge_data edata;
        edata.variables = 
          vdata.variables.intersect(parent_vdata.variables);
        // Add the actual edges
        jt_graph.add_edge(child_id, parent_id, edata);
        jt_graph.add_edge(parent_id, child_id, edata);
      }
    } // end of for each
  } // End of construct cliques


  { // Assign factors 
    // Very important that these be assigned in reverse order
    size_t jt_vid = jt_graph.num_vertices() - 1;
    rev_foreach(const jtree_list::elim_clique& clique, cliques) {
      ASSERT_LT(jt_vid, jt_graph.num_vertices());
      jtree_vertex_data& jt_vdata = jt_graph.vertex_data(jt_vid--);
      const mrf_vertex_data& mrf_vdata = mrf.vertex_data(clique.elim_vertex);
      foreach(factor_id_t fid, mrf_vdata.factor_ids) {
        if(!assigned_factors[fid]) {
          jt_vdata.factor_ids.push_back(fid);
          assigned_factors[fid] = true;
        }
      }
    }
  }
} // end of build junction tree


struct jtgraph_node {
  vertex_id_t vid;
  std::set<vertex_id_t> vars;
  std::set<vertex_id_t> neighbors;
};

typedef std::vector<jtgraph_node> jtgraph_type;


std::ostream& 
operator<<(std::ostream& out, const std::set<vertex_id_t>& set) {
  using namespace graphlab;
  return out << set;
  // out << "{";
  // size_t i = 0; 
  // foreach(const vertex_id_t vid, set) {
  //   out << vid;
  //   if(i + 1 < set.size()) out << ", ";
  //   i++;
  // }
  // out << "}";
  // return out;
}

void test_rip(const jtgraph_type& graph) {
  namespace gl = graphlab;
  std::map<vertex_id_t, 
    std::map<vertex_id_t, std::set< vertex_id_t> > > edgedata;
  std::vector< std::set<vertex_id_t> > reachable(graph.size());
  foreach(const jtgraph_node& node, graph) {
    reachable[node.vid] = node.vars;
    // initiailize the out edges;
    foreach(vertex_id_t nvid, node.neighbors) {
      ASSERT_NE(nvid, node.vid);
      edgedata[node.vid][nvid] = node.vars;
    }
  }
  rev_foreach(const jtgraph_node& node, graph) {
    // Receive in reachable
    foreach(vertex_id_t nvid, node.neighbors) {
      reachable[node.vid].insert(edgedata[nvid][node.vid].begin(),
                                 edgedata[nvid][node.vid].end());
    }
    // write out reachable
    foreach(vertex_id_t nvid, node.neighbors) {
      std::set<vertex_id_t> tmpset =         
        gl::set_difference(reachable[node.vid], edgedata[nvid][node.vid]);
      edgedata[node.vid][nvid].insert(tmpset.begin(), tmpset.end());

    }
  }
  foreach(const jtgraph_node& node, graph) {
    // Receive in reachable
    foreach(vertex_id_t nvid, node.neighbors) {
      reachable[node.vid].insert(edgedata[nvid][node.vid].begin(),
                                 edgedata[nvid][node.vid].end());
    }
    // write out reachable
    foreach(vertex_id_t nvid, node.neighbors) {
      std::set<vertex_id_t> tmpset =         
        gl::set_difference(reachable[node.vid], edgedata[nvid][node.vid]);
      edgedata[node.vid][nvid].insert(tmpset.begin(), tmpset.end());
    }
  }

  // Check the running intersection property
  foreach(const jtgraph_node& node, graph) {
    // std::cout << node.vid << ": " << node.vars << std::endl     
    //           << "\t" << node.neighbors << std::endl;
    std::set<vertex_id_t> local_sep_set;
    foreach(const vertex_id_t n1, node.neighbors) {
      //      std::cout << "\t" << n1 << "--" << edgedata[n1][node.vid] << std::endl;
      foreach(const vertex_id_t n2, node.neighbors) {
        if(n1 != n2) {
          local_sep_set =
            gl::set_union(local_sep_set,
                          gl::set_intersect(edgedata[n1][node.vid],
                                            edgedata[n2][node.vid]));
        }
      }
    }
    //    std::cout << "\t" << local_sep_set << std::endl;
    ASSERT_TRUE(gl::is_subset(local_sep_set, node.vars));
  }
  //  getchar();

}


/**
 * Scan the junction tree list to ensure that all invariants hold.
 */
void jtree_list::
validate() const {
  jtgraph_type jtgraph(cliques.size());
  // validate the junction tree list data structure
  for(size_t i = 0; i < cliques.size(); ++i) {
    const elim_clique& clique = cliques[i];
    ASSERT_EQ(i, elim_time_lookup(clique.elim_vertex));
    ASSERT_FALSE(clique.vertices.contains(clique.elim_vertex));
    const bool is_root = (clique.parent == NULL_VID);
    if(is_root) {
      ASSERT_EQ(i, 0);
    } else {
      ASSERT_GT(i, 0);
      // ensure that the parent is elimated later
      ASSERT_LT(clique.parent, jtgraph.size());
      const elim_clique& parent_clique = cliques[clique.parent];
      ASSERT_TRUE(clique.vertices.contains(parent_clique.elim_vertex));
      ASSERT_LE(clique.vertices, 
                parent_clique.vertices + parent_clique.elim_vertex);
    }
    // populate the jtgraph node
    jtgraph_node& node(jtgraph[i]);
    node.vid = i;
    node.vars.insert(clique.elim_vertex);
    node.vars.insert(clique.vertices.begin(), clique.vertices.end());
    if(!is_root) {
      node.neighbors.insert(clique.parent);   
      jtgraph[clique.parent].neighbors.insert(i);
    }
  }
  // test running intersection property.  
  test_rip(jtgraph);
}


#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/junction_tree.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_JUNCTION_TREE_HPP
#define PGIBBS_JUNCTION_TREE_HPP


/**
 *
 * Represents a junction tree
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>


#include <iostream>
#include <iomanip>

#include <fstream>
#include <vector>
#include <map>
#include <set>
#include <string>
#include <cassert>


#include <boost/unordered_map.hpp>


#include <graphlab/logger/assertions.hpp>
#include <graphlab.hpp>


#include "factorized_model.hpp"
#include "mrf.hpp"


struct jtree_vertex_data; 
struct jtree_edge_data;

typedef graphlab::graph< jtree_vertex_data, jtree_edge_data> jtree_graph_type;


struct jtree_vertex_data {
  jtree_graph_type::vertex_id_type parent;
  domain_t variables;
  bool calibrated;
  bool sampled;
  std::vector<factor_id_t> factor_ids;
  factor_t factor;
  assignment_t asg;
  
  jtree_vertex_data() : parent(-1), calibrated(false), sampled(false) { }
}; // End of vertex data


struct jtree_edge_data {
  domain_t variables;
  factor_t message;
  bool calibrated;
  bool received;
  jtree_edge_data() : 
    calibrated(false), 
    received(false) { }
}; // End of edge data


//// Junction tree construction code
//// =====================================================================>

//! The fast set used in junction tree construction
typedef graphlab::small_set<2*MAX_DIM, jtree_graph_type::vertex_id_type> 
vertex_set;


struct jtree_list {
  struct elim_clique {
    //! The parent of this elim clique in the jtree_list
    jtree_graph_type::vertex_id_type parent;
    //! The vertex eliminated when this clique was created 
    mrf_graph_type::vertex_id_type elim_vertex;
    //! The vertices created in this clique EXCLUDING elim_vertex
    vertex_set vertices; 
    elim_clique() : parent(-1) { }
  };
  typedef std::vector<elim_clique> clique_list_type;
  typedef boost::unordered_map<mrf_graph_type::vertex_id_type,
                               mrf_graph_type::vertex_id_type> 
  elim_time_type;
  
  //! The collection of cliques
  clique_list_type cliques;
  //! the time variable i was eliminated
  elim_time_type   elim_time;

  inline bool contains(const mrf_graph_type::vertex_id_type vid) const {
    return elim_time.find(vid) != elim_time.end();
  }

  inline mrf_graph_type::vertex_id_type
  elim_time_lookup(const mrf_graph_type::vertex_id_type vid) const {
    elim_time_type::const_iterator iter(elim_time.find(vid));
    ASSERT_TRUE(iter != elim_time.end());
    return iter->second;
  }


  inline void clear() {
    cliques.clear();
    elim_time.clear();
  }

  /**
   * Extend the jtree_list data structure by eliminating the vertex.
   * If the jtree list can be extended then it is extended and this
   * function returns true.
   *
   **/
  bool extend(const mrf_graph_type::vertex_id_type elim_vertex,
              const mrf_graph_type& mrf,
              const size_t max_tree_width,
              const size_t max_factor_size);

  /**
   * Convert a jtree_list into a jtree_graph 
   */
  void load_graph(const mrf_graph_type& mrf,
                  const size_t num_factors,
                  jtree_graph_type& jt_graph) const;

  /**
   * Check internal data structures
   */
  void validate() const;

};


// /**
//  * Extend the jtree_list data structure by eliminating the vertex.  If
//  * the jtree list can be extended then it is extended and this
//  * function returns true.
//  *
//  **/
// bool extend_jtree_list(const vertex_id_t elim_vertex,
//                        const mrf_graph_type& mrf,
//                        const size_t max_tree_width,
//                        const size_t max_factor_size,
//                        jtree_list& jt_list);


// /**
//  * Convert a jtree_list into a jtree_graph 
//  */
// void jtree_list_to_jtree_graph(const jtree_list& jt_list,
//                                const mrf_graph_type& mrf,
//                                const size_t num_factors,
//                                jtree_graph_type& jt_graph);


// /**
//  * Scan the junction tree list to ensure that all invariants hold.
//  */
// bool validate_jtree_list(const jtree_list& jt_list);


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/make_denoise_alchemy.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * Run parallel junction tree gibbs sampling on a factorized model
 */

#include <cstdlib>
#include <iostream>


#include <graphlab.hpp>

#include "image.hpp"
#include "factorized_model.hpp"

#include <graphlab/macros_def.hpp>


/** Construct denoising ising model based on the image */
void construct_denoise_graph(image& img,
                             size_t num_rings,
                             double sigma,
                             const std::string& corruption,
                             factor_t edge_factor,
                             factorized_model& model) {
 
} // End of construct graph


int main(int argc, char** argv) {
  std::cout << "make the image denoising alchemy problem"
            << std::endl;

  std::string model_filename = "image";
  std::string drawing = "sunset";
  std::string corruption = "gaussian";
  std::string smoothing = "square";
  double lambda = 3;
  double sigma = 1;
  size_t rows = 200;
  size_t rings = 7;
  

  // Command line parsing
  graphlab::command_line_options clopts("Make the alchemy image", true);
  clopts.attach_option("model", 
                       &model_filename, model_filename,
                       "Alchemy formatted model file");
  clopts.attach_option("drawing", 
                       &drawing, drawing,
                       "drawing type");
  clopts.attach_option("corruption", 
                       &corruption, corruption,
                       "corruption type");
  clopts.attach_option("smoothing", 
                       &smoothing, smoothing,
                       "smoothing type");
  clopts.attach_option("lambda", 
                       &lambda, lambda,
                       "edge parameter");
  clopts.attach_option("sigma", 
                       &sigma, sigma,
                       "noise parameter");
  clopts.attach_option("rows", 
                       &rows, rows,
                       "number of rows and cols");
  clopts.attach_option("rings", 
                       &rings, rings,
                       "number of rings");

  if( !clopts.parse(argc, argv) ) { 
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    return EXIT_FAILURE;
  }


  std::cout << "Creating a synethic image." << std::endl;
  image original(rows, rows);
  if(drawing == "sunset") 
    original.paint_sunset(rings);
  else if(drawing == "checkerboard")
    original.paint_checkerboard(rings);
  else {
    std::cout << "Invalid drawing type!" << std::endl;
    exit(1);
  }
  std::cout << "Saving original image. " << std::endl;
  original.save("original.pgm");    

    
  std::cout << "Corrupting Image. " << std::endl;
  image noisy = original;
  if(corruption == "gaussian") 
    noisy.gaussian_corrupt(sigma);
  else if(corruption == "flip")
    noisy.flip_corrupt(rings, 0.75);
  else if(corruption == "ising") 
    noisy = image(rows, rows);
  else {
    std::cout << "Invalid corruption type!" << std::endl;
    exit(1);
  }
  std::cout << "Saving corrupted image. " << std::endl;
  noisy.save("corrupted.pgm");
  

  // dummy variables 0 and 1 and num_rings by num_rings
  std::cout << "Creating edge factor" << std::endl;
  factor_t edge_factor(domain_t(variable_t(0, rings), variable_t(1, rings)));
  // Set the smoothing type
  if(smoothing == "square") {
    edge_factor.set_as_agreement(lambda);
  } else if (smoothing == "laplace") {
    edge_factor.set_as_laplace(lambda);
  } else  {
    std::cout << "Invalid smoothing stype!" << std::endl;
    assert(false);
  }
  std::cout << edge_factor << std::endl;
  
  std::cout << "Constructing factor graph." << std::endl;
  factorized_model model;
  // Add all the node factors
  double sigmaSq = sigma*sigma;
  for(size_t i = 0; i < noisy.rows(); ++i) {
    for(size_t j = 0; j < noisy.cols(); ++j) {
      // initialize the potential and belief
      uint32_t pixel_id = noisy.vertid(i, j);
      variable_t var(pixel_id, rings);
      factor_t factor(var);
      // Set the node potential
      double obs = noisy.pixel(i, j);
      if(corruption == "gaussian") {
        for(size_t pred = 0; pred < rings; ++pred) {
          factor.logP(pred) = 
            -(obs - pred)*(obs - pred) / (2.0 * sigmaSq);
        }
      } else if(corruption == "flip") {
        for(size_t pred = 0; pred < rings; ++pred) {
          factor.logP(pred) = obs == pred? 0 : -sigma;
        }
      } else if(corruption == "ising") {
        // Do nothing since we want a uniform node potential
        factor.uniform();
      } else {
        std::cout << "Invalid corruption!" << std::endl;
        exit(1);
      }
      factor.normalize();
      model.add_factor(factor);
    } // end of for j in cols
  } // end of for i in rows

  // Construct edge_factors  
  for(size_t i = 0; i < noisy.rows(); ++i) {
    for(size_t j = 0; j < noisy.cols(); ++j) {
      size_t source = noisy.vertid(i,j);
      variable_t source_var(source, rings);
      if(i+1 < noisy.rows()) {
        vertex_id_t target = noisy.vertid(i+1, j);
        variable_t target_var(target, rings);
        domain_t dom(source_var, target_var);
        edge_factor.set_args(dom);
        model.add_factor(edge_factor);
      }
      if(j+1 < noisy.cols()) {
        vertex_id_t target = noisy.vertid(i, j+1);
        variable_t target_var(target, rings);
        domain_t dom(source_var, target_var);
        edge_factor.set_args(dom);
        model.add_factor(edge_factor);
      }
    } // end of for j in cols
  } // end of for i in rows

  std::cout << "Saving model in alchemy format" << std::endl;
  model.save_alchemy(model_filename + ".alchemy");


  return EXIT_SUCCESS;
} // end of main


#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/Makefile
================================================

gibbs_sampler_impl: gibbs_sampler_impl.cpp matwrap.hpp
	mex -largeArrayDims \
	CXXFLAGS="-g -fPIC -Wall -O3 -pthread -fexceptions -fno-omit-frame-pointer -fopenmp" \
	gibbs_sampler_impl.cpp \
	-I../../../src \
	-L../../../release/src/graphlab \
	-L../../../release/src/graphlab/extern/metis/GKlib \
	-L../../../release/src/graphlab/extern/metis/libmetis \
	-L../../../release/demoapps/pgibbs \
	-lpgibbs_pic \
	-lgraphlab_pic \
	-lgomp	


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/asg2ind.m
================================================
function ndx = asg2ind(siz, asg)
multiple = [1, cumprod(siz(1:end-1))];
assert(isempty(find(asg > siz, 1)));
ndx = sum(multiple .* (asg - 1)) + 1;
end


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/compile_gibbs_sampler.m
================================================
%% Compile all mex files
% This script compiles the mex files needed to run the parallel sampling
% algorithms.

graphlab_path='../../..';
graphlab_bin_path=[graphlab_path, '/release'];
pgibbs_bin_path=[graphlab_bin_path, '/demoapps/pgibbs'];
graphlab_include_path = [graphlab_path, '/src'];
graphlab_link_path = [graphlab_bin_path, '/src/graphlab'];
cxx_flags = ['CXXFLAGS=', ...
   '"-fPIC -Wall -O3 -pthread -fexceptions -fno-omit-frame-pointer ', ...
   '-fopenmp"'];

 
%% If release folder does not exist run configure
if(~exist(graphlab_bin_path, 'dir')) 
  disp('Configure was not yet run running config now.');
  [errorstatus, result] = ...
    system(['cd ', graphlab_path, ';', ' ./configure'])
  if(errorstatus) 
    error('Error running config!');
  end
end

%% Compile the pgibbs library needed for the mex file
[errorstatus, result] = ...
  system(['cd ', pgibbs_bin_path, ';', ' make -j2'])
if(errorstatus) 
  error('Error compiling pgibbs!');
end

  
%% Do the compilation
compiler_type_flags = '';
if(ismac()) 
  disp('We require gcc 4.2 on mac');
  compiler_type_flags = 'LD=gcc-4.2 CC=gcc-4.2 CXX=g++-4.2';
end 

compilestr = ...
   ['mex ', compiler_type_flags, ' ', ...
    '-largeArrayDims', ' ', ...
    cxx_flags, ' ', ...
    'gibbs_sampler_impl.cpp', ' ', ...
    '-I', graphlab_include_path, ' ', ...
    '-L', graphlab_link_path, ' ', ...
    '-L', graphlab_link_path, '/extern/metis/GKlib', ' ', ...
    '-L', graphlab_link_path, '/extern/metis/libmetis', ' ', ...
    '-L', pgibbs_bin_path, ' ', ...
    '-lpgibbs_pic', ' ', ...
    '-lgraphlab_pic ', ' ', ...
    '-lgomp'];

disp(compilestr);
eval(compilestr);
disp('Finished!');


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/gibbs_sampler.m
================================================
%% Parallel Gibbs sampler
% The parallel gibbs sampler is an optimized a c++ implementation of
% the discrete Gibbs samplers which uses multiple threads to
% accelerate the generation of a single sampling chain.  The parallel
% Gibbs sampler implements two algorithms described in the paper:
%
%   Parallel Gibbs Sampling: From Colored Fields to Think Junction Trees
%     by Joseph Gonzalez, Yucheng Low, Arthur Gretton, and Carlos Guestrin
%
% The first algorithm is the Chromatic sampler which is a direct
% parallelization of the classic Gibbs sampler.  The second algorithm
% is the Splash Gibbs sampler which incrementally builds thin junction
% trees.
%
% To use this function you must first construct a discrete factor
% graph which is simply a cell array of table factors:
%
%   factor{1} = table_factor( [1,2], log(rand(3,4)) );
%   factor{2} = table_factor( [2,3], log(rand(4,2)) );
%
% This creates a factorized model (with random tables) over the
% variables 1, 2, and 3. We can then run the CHROMATIC sampler by
% calling:
%
%   options.alg_type = 'CHROMATIC';
%   options.nsamples = 100;
%   options.nskip    = 10;
%   [samples, nupdates, nchanges, marginals] = ...
%      gibbs_sampler(factors, options); 
%
%
% 
% Arguments:
%   factors: a cell array of factors constructed using the table_factor 
%     function.
%   options: a struct with the following fields:
%    * alg_type: [Default: 'CHROMATIC'] A string either 'CHROMATIC' or
%        'SPLASH'.  For relatively fast mixing models the 'CHORMATIC'
%        algorithm is simpler and faster.  For slowly mixing models
%        use the 'SPLASH' algorithm.  In this case additional options
%        will need to be set.
%    * nsamples: [Default: 10] The number of joint samples to collect.
%    * nskip: [Default: 10] The number of samples to skip between
%        joint samples.  Because of the asynchronous nature of the
%        algorithms more or than nskip samples may actually be skipped
%        in practice.  In the 'SPLASH' algorithm nskip * nvariables
%        single variable updates are computed before the next joint
%        sample is constructed.
%    * ncpus: [Default: 2] The number of threads to use when running
%        the inference algorithm. The number of cpus should be less
%        than the number of variables and ideally not much larger than
%        the number of processors.
%    * treewidth: [Default: 3] The treewidth of the junction trees
%        constructed using the Splash sampler.
%    * treeheight: [Default: maxint] The largest height of a tree
%    * treesize: [Default: maxint] The largest possible size of a
%        tree
%    * priorities: [Default: false] Use priorities when
%       constructing the splash trees
%    * checkargs: [Default: True] Determines if the arguments are
%        checked before calling the C++ code.  While we do additional
%        argument checking withing the C++ code it is often easier to
%        debug broken factors from within the matlab code.  However
%        for the fastest performance disable checkargs (set to false).
%
% Return Arguments:
%   samples: nvars * nsamples matrix of joint assingments
%   nupdates: nvars * nsamples number of times each variable was updated.
%   nchanges: nvars * nsamples the number of times the variable's 
%      assignment changed values
%   beliefs: nvars * nsamples cell array of vectors represent the 
%      Rao-Blackwellized marginal estimates for each variable.
%
% See Also: table_factor
%
% This actual c++ mex function is provided in gibbs_sampler_impl.cpp
% which can be compiled by running compile_gibbs_sampler.m.
%
function [samples, nupdates, nchanges, marginals] = ...
      gibbs_sampler(factors, options)    

  %% Check the arguments
  if(~iscell(factors))
    error('The factors argument must be a cell array of table_factors');
  end
  % Define default options
  if(~exist('options', 'var'))
    options.alg_type = 'CHROMATIC';
  end
  if(~isfield(options, 'alg_type'))
    options.alg_type = 'CHROMATIC';
  end
  if(~isfield(options, 'nsamples'))
    options.nsamples = 10;
  end
  if(~isfield(options, 'nskip'))
    options.nskip = 10;
  end
  if(~isfield(options, 'ncpus'))
    options.ncpus = 2;
  end
  if(~isfield(options, 'treewidth'))
    options.treewidth = 3;
  end
  if(~isfield(options, 'treeheight'))
    options.treeheight = double(intmax());
  end
  if(~isfield(options, 'treesize'))
    options.treesize = double(intmax());
  end
  if(~isfield(options, 'priorities'))
    options.priorities = false;
  end
  if(~isfield(options, 'vanish'))
    options.vanish = 10;
  end
  if(~isfield(options, 'checkargs'))
    options.checkargs = true;
  end
  if(~isfield(options, 'save_alchemy'))
    options.save_alchemy = false;
  end

  options.treewidth = double(options.treewidth);
  if(options.treewidth > 32) 
     error('Treewidth must be less than 32');
  end
  if(options.treewidth < 1) 
     error('Treewidth must be at least 1');
  end

  if(options.checkargs) 
    max_var = 0;
    %% Check the factors data structure
    for i = 1:length(factors)
      if(~isfield(factors{i}, 'vars'))
        disp(factors{i});
        error('Factor %d does not contain the field vars', i);
      end
      if(~strcmp(class(factors{i}.vars), 'uint32'))
        disp(factors{i});
        error(['Factor ', num2str(i), ...
               ' has variables of type ', ...
               class(factors{i}.vars), ...
               ' when they should be of type uint32.']);    
      end
      if(~isfield(factors{i}, 'logP'))
        disp(factors{i});
        error('Factor %d does not contain the field logP', i);
      end
      if(~strcmp(class(factors{i}.logP), 'double'))
        disp(factors{i});
        error(['Factor ', num2str(i), ...
               ' has logP of type ', ...
               class(factors{i}.logP), ...
               ' when they should be of type double.']);    
      end    
      % Get the maximum variables
      max_var = max(max(factors{i}.vars(:)), max_var);
      if(min(factors{i}.vars) <= 0) 
        disp(factors{i});
        error('Factor %d has 0 valued variables', i);
      end   
    end

    %% check all the variables have consistent sizes;
    vars = 1:max_var;
    var_sizes = zeros(max_var, 1);
    for i = 1:length(factors)
      current_sizes = var_sizes(factors{i}.vars);
      dims = size(factors{i}.logP)';
      dims = dims(dims > 1);
      if(length(dims) ~= length(current_sizes))
        error(['The number of variables %d in factor %d does not match ' ...
               'the number of dimensions %d.'], ...
              length(current_sizes), i, length(dims));
      end   
      ind = current_sizes > 0;
      % all elements in ind have been set and should just match
      if( ~isempty(find(current_sizes(ind) ~= dims(ind), 1)) ) 
        errorind = find(current_sizes(ind) ~= dims(ind), 1);
        error(['Variable %d has already been seen having size %d ', ...
               'but was just now observed to have size %d in factor %d.'], ...
              factors{i}.variables(ind(errorind)), ...
              current_sizes(ind(errorind)), ...
              dims(ind(errorind)), ...
              i);
      end
      var_sizes(factors{i}.vars(~ind)) = dims(~ind);
    end
    unset_vars = find(var_sizes(:) == 0);
    if(~isempty(unset_vars)) 
      error(['The following variables were not set correctly: ', ...
             mat2str(unset_vars)]);   
    end
  end
  
  %% Call the sampler
  if(nargout() <= 1)
    samples = gibbs_sampler_impl(factors, options);
  elseif(nargout() == 2)
    [samples, nupdates] = gibbs_sampler_impl(factors, options);
  elseif(nargout() == 3)
    [samples, nupdates, nchanges] = ...
      gibbs_sampler_impl(factors, options);
  elseif(nargout() == 4)
    [samples, nupdates, nchanges, marginals] = ...
      gibbs_sampler_impl(factors, options);
  end

end
    
    
================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/gibbs_sampler_impl.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


/**
 * This file contains the code used to run the chromatic gibbs sampler
 * from matlab
 */

#include "mex.h"

#include <iostream>

#include <graphlab.hpp>


#include "../factorized_model.hpp"
#include "../mrf.hpp"
#include "../global_variables.hpp"
#include "../junction_tree.hpp"

#include "../chromatic_sampler.hpp"
#include "../jt_splash_sampler.hpp"


#include "matwrap.hpp"

using namespace std;

///////////////////////////////////////////////////////////////////////////
// Struct fieldnames

const char* vars_field_name = "vars";
const char* logP_field_name = "logP";
int vars_field_id = -1;
int logP_field_id = -1;


///////////////////////////////////////////////////////////////////////////


struct options {
  enum { CHROMATIC, SPLASH } alg_type;
  size_t nsamples;
  //  double tskip;
  size_t nskip;
  size_t ncpus;

  //  size_t ntrees;
  size_t treewidth;
  size_t treeheight;
  size_t treesize;
  bool priorities;
  size_t vanish;
  bool save_alchemy;
  size_t ncpus_per_splash;

  options(matwrap args =  matwrap(NULL)) :
    alg_type(CHROMATIC),  nsamples(10), nskip(10), 
    // tskip(5), 
    ncpus(2),
    // ntrees(ncpus),
    treewidth(3), 
    treeheight(std::numeric_limits<size_t>::max()), 
    treesize(std::numeric_limits<size_t>::max()), 
    priorities(false),  
    vanish(10),
    save_alchemy(false),
    ncpus_per_splash(1) {
    if(args.is_null()) return;
    safe_assert(args.is_struct(), 
                "Additional arguments must be in a struct");
    { // parse the sampler algorithm type
      matwrap arg(args.get_field("alg_type"));
      if(!arg.is_null()) {
        const size_t str_len(255);
        char sampler_type_c_str[str_len];
        arg.as_string(sampler_type_c_str, str_len);
        std::string sampler_type_str(sampler_type_c_str, arg.size());

        if(sampler_type_str == "CHROMATIC") alg_type = CHROMATIC;
        else if(sampler_type_str == "SPLASH") alg_type = SPLASH;
        else {
          char error_str[2*str_len];
          std::sprintf(error_str, "Invalid sampler type: (%s)\n", 
                       sampler_type_c_str);
          mexErrMsgTxt(error_str);
        }        
      }
    } // end of parse field name
    { // parse the number of samples
      matwrap arg(args.get_field("nsamples"));
      if(!arg.is_null()) {
        nsamples = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name
    { // parse the number of skipped samples
      matwrap arg(args.get_field("nskip"));
      if(!arg.is_null()) {
        nskip = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name
    // { // parse the number of skipped samples
    //   matwrap arg(args.get_field("tskip"));
    //   if(!arg.is_null()) {
    //     tskip = arg.get_double_array()[0];    
    //   }
    // } // end of parse field name                        
    { // parse the number of cpus
      matwrap arg(args.get_field("ncpus"));
      if(!arg.is_null()) {
        ncpus = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name
    // { // parse the number of trees
    //   matwrap arg(args.get_field("ntrees"));
    //   if(!arg.is_null()) {
    //     ntrees = size_t(arg.get_double_array()[0]);    
    //   }
    // } // end of parse field name
    { // parse the treewidth
      matwrap arg(args.get_field("treewidth"));
      if(!arg.is_null()) {
        treewidth = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name
    { // parse the treeheight
      matwrap arg(args.get_field("treeheight"));
      if(!arg.is_null()) {
        treeheight = size_t(arg.get_double_array()[0]);    
      }
      ASSERT_GT(treeheight, 0);
    } // end of parse field name
    { // parse the treesize
      matwrap arg(args.get_field("treesize"));
      if(!arg.is_null()) {
        treesize = size_t(arg.get_double_array()[0]);    
      }
      ASSERT_GT(treesize, 0);
    } // end of parse field name
    { // parse the priorities
      matwrap arg(args.get_field("priorities"));
      if(!arg.is_null()) {
        priorities = bool(arg.get_double_array()[0]);    
      }
    } // end of parse field name                         
    { // parse the vanish
      matwrap arg(args.get_field("vanish"));
      if(!arg.is_null()) {
        vanish = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name
    { // parse the ncpus_per_splash
      matwrap arg(args.get_field("npcus_per_splash"));
      if(!arg.is_null()) {
        ncpus_per_splash = size_t(arg.get_double_array()[0]);    
      }
    } // end of parse field name                         
    { // parse the ncpus_per_splash
      matwrap arg(args.get_field("save_alchemy"));
      if(!arg.is_null()) {
        save_alchemy = bool(arg.get_double_array()[0]);    
      }
    } // end of parse field name                         

  } // end of constructor

  void print() {
    std::cout << "Generating "
              << nsamples << " samples " << std::endl;
    std::cout << "Skipping every " << nskip << " samples." << std::endl;
    switch(alg_type) {
    case CHROMATIC: mexPrintf("Using the chromatic sampler.\n"); break;
    case SPLASH: mexPrintf("Using the splash sampler.\n"); 
      std::cout << "ncpus:        "  << ncpus << std::endl
        //                << "ntrees:       "  << ntrees << std::endl
                << "treewidth:    "  << treewidth << std::endl
                << "treeheight:   " << treeheight << std::endl
                << "prioritizeds:   " << (priorities ? "enabled" : "disabled")
                << std::endl
                << "ncpus/splash: " << ncpus_per_splash << std::endl;

      break;
    default: mexErrMsgTxt("No algorithm selected!\n"); break;
    }
    flush_screen();
  }

}; // end of options struct


///////////////////////////////////////////////////////////////////////////
// Factor Graph generation code

/**
 * Build a graphlab table factor from the matlab table factor
 */
void add_factor(factorized_model& model,
                matwrap matlab_factor) {                
  if(matlab_factor.is_null()) {
    mexErrMsgTxt("Null factor argument to build factor!\n");    
  }
  if(!matlab_factor.is_struct()) {
    mexErrMsgTxt("Invalid factor type");
  }
  // Get the field numbers
  if(vars_field_id < 0) {
    vars_field_id = matlab_factor.get_field_number(vars_field_name);
  }
  if(logP_field_id < 0) {
    logP_field_id = matlab_factor.get_field_number(logP_field_name);
  }
  safe_assert(vars_field_id >= 0, "No field named vars in factor struct!");
  safe_assert(logP_field_id >= 0, "No field named logP in factor struct!");

  // Load the members from the factor
  matwrap variables = matlab_factor.get_field(vars_field_id);
  matwrap logP = matlab_factor.get_field(logP_field_id);

  // Check the fields 
  safe_assert(variables.is_uint32(), "Variables are not uint32t");
  safe_assert(logP.is_double(), "logP must be of type double");

  // Get information about each field
  const mwSize num_vars = variables.size();
  const mwSize* var_dims = logP.get_dimensions();
  const mwSize num_dims = logP.get_num_dimensions();
  safe_assert(num_vars <= MAX_DIM,
              "Too many (>32) variables in factor.");


  // Build up the domain
  domain_t dom;
  const uint32_t* varids = variables.get_data<uint32_t>();
  for(size_t i = 0, j = 0; i < num_vars && j < num_dims; ++i, ++j) {
    // Skip all the empty dimensions
    while(var_dims[j] <= 1 && j < num_dims) j++;
    safe_assert(j < num_dims, "Factor dimensions do not match variables");
    safe_assert(varids[i] > 0, "Variabile ids must start at 1");
    variable_t var(varids[i]-1, var_dims[j]);
    dom += var;
  }

  //  Create the factor
  factor_t& factor(model.add_factor(dom));
  const size_t num_logP(logP.size());
  const double* data(logP.get_double_array());
  safe_assert(num_logP == dom.size(), 
          "Insufficient factor data to match variables");
  for(size_t i = 0; i < num_logP; ++i) {
    factor.logP(i) = data[i];
  }
  factor.normalize();

}


void build_factorized_model(factorized_model& model, matwrap factors) {
  const size_t num_factors(factors.size());
  model.reserve(num_factors);
  // Load all the factors
  for(size_t i = 0; i < num_factors; ++i) {
    add_factor(model, factors.get_cell(i));   
  }
}


///////////////////////////////////////////////////////////////////////////
// Code for sample collection
struct result_collector {
  size_t sample_id;
  matwrap samples;
  matwrap beliefs;
  matwrap nsamples; 
  matwrap nchanges; 
 
  result_collector(matwrap samples = NULL, 
                   matwrap beliefs = NULL,
                   matwrap nsamples = NULL,
                   matwrap nchanges = NULL) :
    sample_id(0), samples(samples), beliefs(beliefs), 
    nsamples(nsamples), nchanges(nchanges)  { }
};

graphlab::glshared<result_collector> glshared_collector;

void collector_sync(mrf_gl::iscope& scope,
                    graphlab::any& accumulator) {
  result_collector collector = glshared_collector.get_val();
  const size_t& sample_id = collector.sample_id;
  const mrf_vertex_data& vdata(scope.const_vertex_data());
  if(!collector.samples.is_null() && 
     sample_id < collector.samples.cols()) {
    collector.samples.mat_index2d(vdata.variable.id(), 
                                  sample_id) = vdata.asg + 1;     
  }
  if(!collector.beliefs.is_null() && 
     sample_id < collector.beliefs.cols()) {
    matwrap blf(collector.beliefs.
                       get_cell_index2d(vdata.variable.id(), sample_id));
    double sum = 0;
    for(size_t i = 0; i < vdata.variable.size(); ++i) {
      sum += (blf.get_double_array()[i] = exp(vdata.belief.logP(i)));
    }
    for(size_t i = 0; i < vdata.variable.size(); ++i) {
      blf.get_double_array()[i] /= sum;
    }
  }
  if(!collector.nsamples.is_null() && 
     sample_id < collector.nsamples.cols()) {
    collector.nsamples.mat_index2d(vdata.variable.id(),
                                   sample_id) = vdata.nsamples;
  }
  if(!collector.nchanges.is_null() && 
     sample_id < collector.nchanges.cols()) {
    collector.nchanges.mat_index2d(vdata.variable.id(),
                                   sample_id) = vdata.nchanges;
  }
}

void collector_apply(graphlab::any& current_data, 
                     const graphlab::any& param) {
  result_collector& collector = current_data.as<result_collector>();
  collector.sample_id++;
}

void collector_merge(graphlab::any& merge_dest, 
                     const graphlab::any& merge_src) {
  // nop
}


size_t global_termination_nsamples = 0;
bool nsamples_terminator() {
  return glshared_collector.get_val().sample_id >= 
    global_termination_nsamples;
}


void run_chromatic_sampler(mrf_gl::core& core, 
                           const options& opts) {
  
  core.sched_options().add_option("update_function", 
                                  single_gibbs_update);
  // core.sched_options().add_option("max_iterations", 
  //                                 opts.nsamples * opts.nskip);
  global_termination_nsamples = opts.nsamples;
  core.engine().add_terminator(nsamples_terminator);
  std::cout << "Running." << std::endl;
  flush_screen();
  double runtime = core.start();    
  std::cout << "Runtime: " << runtime << std::endl;
  flush_screen();
}  // end of run_chromatic_sampler


void run_jt_splash_sampler(mrf_gl::core& core, 
                           const options& opts) {

  std::cout << "Starting Splash Sampler." << std::endl;
  flush_screen();
  splash_settings settings;
  settings.ntrees           = opts.ncpus;
  settings.max_tree_width   = opts.treewidth;
  settings.max_tree_height  = opts.treeheight;
  settings.max_tree_size    = opts.treesize;
  settings.priorities       = opts.priorities;
  settings.subthreads       = opts.ncpus_per_splash;

  jt_splash_sampler jtsplash_sampler(core.graph(),
                                     settings);
  
  const size_t samples_per_iteration = 
    core.graph().num_vertices() * opts.nskip;
  for(size_t i = 0; i < opts.nsamples; ++i) {
    //    std::cout << "Running an iteration" << std::endl;
    flush_screen();
    // run the splash sampler
    jtsplash_sampler.sample_updates(samples_per_iteration);
    // jtsplash_sampler.sample_seconds(opts.tskip);
    // compute the sync
    core.sync_now(glshared_collector);
    // std::cout << "Ntrees: " << jtsplash_sampler.total_trees() 
    //           << std::endl
    //           << "Nsamples: " << jtsplash_sampler.total_samples()
    //           << std::endl
    //           << "NCollisions: " << jtsplash_sampler.total_collisions()
    //           << std::endl;
    
  }
  
} // end of run splash sampler


/**
 * See parallel_gibbs.m for identical arguments
 * 
 *   [samples, nupdates, nchanges, marginals] = ...
 *     gibbs_sampler(factors, options); 
 * 
 */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  safe_assert(nrhs > 0, 
              "Invalid number of arguments. See documentation for details");  

  { // Seed the random number generator using rand from within matlab
    mxArray* plhs[1];
    mxArray* prhs[4];
    prhs[0] = mxCreateNumericMatrix(1,1, mxUINT32_CLASS, mxREAL);
    prhs[1] = mxCreateNumericMatrix(1,1, mxUINT32_CLASS, mxREAL);
    prhs[2] = mxCreateNumericMatrix(1,1, mxUINT32_CLASS, mxREAL);
    matwrap(prhs[0]).get_data<uint32_t>()[0] = 
      std::numeric_limits<uint32_t>::max();
    matwrap(prhs[1]).get_data<uint32_t>()[0] = 1;
    matwrap(prhs[2]).get_data<uint32_t>()[0] = 2;
    prhs[3] = mxCreateString("uint32");
    mexCallMATLAB(1, plhs, 4, prhs, "randi");
    const size_t seed_value = 
      matwrap(plhs[0]).get_data<size_t>()[0];
    //    std::cout << "Seed value: " << seed_value << std::endl;
    mxDestroyArray(plhs[0]);
    mxDestroyArray(prhs[0]); 
    mxDestroyArray(prhs[1]); 
    mxDestroyArray(prhs[2]);
    mxDestroyArray(prhs[3]);
    graphlab::random::seed(seed_value);
  }


  // Get first argument the cell array of factors
  const matwrap matlab_factors(const_cast<mxArray*>(prhs[0]));
  if(matlab_factors.is_null()) { 
    mexErrMsgTxt("No factors provided!\n"); 
  }
  if(!matlab_factors.is_cell()) { 
    mexErrMsgTxt("Factors must be in cell array form!\n"); 
  }
  
  options opts;
  if(nrhs > 1) opts = options(const_cast<mxArray*>(prhs[1]));
  opts.print();
 

  // Load the factorized model
  factorized_model model;
  build_factorized_model(model, matlab_factors);
  std::cout << "Finished Loading Factors" << std::endl;
  flush_screen();
  if(opts.save_alchemy) {
    std::cout << "Saving Alchemy file \"problem.alchemy\"" << std::endl;
    model.save_alchemy("problem.alchemy");
  }

  // mexPrintf("Finished Saving Alchemy File\n");

  // Set the global factors
  SHARED_FACTORS_PTR = &(model.factors());


  // Construct the markov random field
  mrf_gl::core mrf_core;
  mrf_from_factorized_model(model, mrf_core.graph());    

  if(opts.alg_type == options::CHROMATIC) {
    const size_t ncolors(mrf_core.graph().compute_coloring());  
    std::cout << "Finished coloring graph with " << ncolors 
              << " colors." << std::endl;
    flush_screen();
    mrf_core.set_ncpus(opts.ncpus);
    mrf_core.set_scheduler_type("chromatic");
    mrf_core.set_scope_type("null");
    mrf_core.engine().set_sched_yield(false);
  }


  // allocate the return matrix for samples
  matwrap matlab_samples;
  if(nlhs > 0) {
    matlab_samples = 
      matwrap::create_matrix(model.variables().size(),
                                    opts.nsamples);
    plhs[0] = matlab_samples.array;
    safe_assert(!matlab_samples.is_null(), 
                "Error initializing return samples");
  }
  if( !matlab_samples.is_null() ) {
    double* entries(matlab_samples.get_double_array());
     const size_t num_entries(matlab_samples.size());
    for(size_t i = 0; i < num_entries; ++i) {
      entries[i] = std::numeric_limits<double>::quiet_NaN();
    }
  }


  // allocate the return matrix for nsamples
  matwrap matlab_nsamples;
  if(nlhs > 1) {
    matlab_nsamples = 
      matwrap::create_matrix(model.variables().size(),
                                    opts.nsamples);
    plhs[1] = matlab_nsamples.array;
    safe_assert(!matlab_nsamples.is_null(), 
                "Error initializing return nsamples");
  }
  if( !matlab_nsamples.is_null() ) {
    double* entries(matlab_nsamples.get_double_array());
     const size_t num_entries(matlab_nsamples.size());
    for(size_t i = 0; i < num_entries; ++i) {
      entries[i] = std::numeric_limits<double>::quiet_NaN();
    }
  }

  // allocate the return matrix for nsamples
  matwrap matlab_nchanges;
  if(nlhs > 2) {
    matlab_nchanges = 
      matwrap::create_matrix(model.variables().size(),
                                    opts.nsamples);
    plhs[2] = matlab_nchanges.array;
    safe_assert(!matlab_nchanges.is_null(), 
                "Error initializing return nchanges");
  }
  if( !matlab_nchanges.is_null() ) {
    double* entries(matlab_nchanges.get_double_array());
     const size_t num_entries(matlab_nchanges.size());
    for(size_t i = 0; i < num_entries; ++i) {
      entries[i] = std::numeric_limits<double>::quiet_NaN();
    }
  }


  // allocate the return cell array for beliefs
  matwrap matlab_beliefs;
  if(nlhs > 3) {
    matlab_beliefs = 
      matwrap::create_cell(model.variables().size(),
                                  opts.nsamples);
    plhs[3] = matlab_beliefs.array;
    safe_assert(!matlab_beliefs.is_null(), 
                "Error initializing return beliefs");
  }
  if( !matlab_beliefs.is_null() ) {
    //populate each of the entries
    for(size_t i = 0; i < mrf_core.graph().num_vertices(); ++i) {
      const mrf_vertex_data& vdata = 
        mrf_core.graph().vertex_data(i);      
      for(size_t j = 0; j < opts.nsamples; ++j) {
        matwrap blf = 
          matwrap::create_matrix(vdata.variable.size(),
                                        1);
        safe_assert(!blf.is_null(), "Unable to allocate beliefs");
        matlab_beliefs.set_cell_index2d(i, j, blf);
      }
    }
  }


  //! initialize results collection
  glshared_collector.set(result_collector(matlab_samples, 
                                          matlab_beliefs, 
                                          matlab_nsamples, 
                                          matlab_nchanges));


  // if(!matlab_samples.is_null()) {
  //   std::cout << "Enabling sample collection." << std::endl;
  //   flush_screen();
    //    last_tic = graphlab::lowres_time_seconds();
  const size_t sync_interval = 
    opts.nskip * mrf_core.graph().num_vertices();
  mrf_core.set_sync(glshared_collector,
                    collector_sync,
                    collector_apply,
                    graphlab::any(size_t(0)),
                    sync_interval,
                    collector_merge);
    //}


  if(opts.alg_type == options::CHROMATIC) {
    run_chromatic_sampler(mrf_core, opts);
  } else if( opts.alg_type == options::SPLASH) {
    run_jt_splash_sampler(mrf_core, opts);
  } else {
    mexErrMsgTxt("Invalid algorithm type.\n");
  }


} // end of main


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/ind2asg.m
================================================
function asg = ind2asg(siz, ndx)
n = length(siz);
asg = zeros(1,n);
ndx = ndx - 1;
for i = 1:n
  asg(i) = mod(ndx, siz(i));
  ndx = floor(ndx / siz(i));
end
asg = asg + 1;
end


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/matwrap.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef MATWRAP
#define MATWRAP

#include <cstring>
#include <cstdio>

#include "mex.h"


/**
 * A safe assertion for matlab
 */
void safe_assert(const bool value, const char* msg) {
  if(!value) { mexErrMsgTxt(msg); }
  //  if( __builtin_expect(!value, 0) ) { mexErrMsgTxt(msg); }
};


void flush_screen() {
  mexEvalString("drawnow");
}


/**
 * A convenient wrapper around mxArray objects
 */
struct matwrap {

  mxArray* array;
  
  matwrap(mxArray* array = NULL) : array(array) {  }


  bool is_null() const { return array == NULL; }

  matwrap get_property(const char* property) const { 
    safe_assert(array != NULL, "dereferenced null mxArray");
    mxArray* result(mxGetProperty(array, 0, property));
    if(result == NULL) {
      char buffer[256];
      sprintf(buffer, "Invalid property %s\n", property);
      mexErrMsgTxt(buffer);
    }
    return matwrap(result);
  } // end of get property

  
  template<typename T>  T* get_data() {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return reinterpret_cast<T*>(mxGetData(array));
  }

  double* get_double_array() {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetPr(array);
  }

  double& mat_index2d(const size_t i, const size_t j) {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetPr(array)[ i + j*rows()];    
  }


  void set_cell_index2d(const size_t i, const size_t j,
                        matwrap contents) {
    safe_assert(array != NULL, "dereferenced null mxArray");
    mxSetCell(array, i + j*rows(), contents.array);    
  }

  matwrap get_cell_index2d(const size_t i, const size_t j) {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetCell(array, i + j*rows());
  }


  matwrap get_field(const char* fieldname) const {
    safe_assert(is_struct(),
                "Attempted to access field of a non-struct element.");
    return mxGetField(array,0,fieldname);
  }

  int get_number_of_fields() const { return  mxGetNumberOfFields(array); }

  matwrap get_field(const int field_id) const {
    safe_assert(is_struct(),
                "Attempted to access field of a non-struct element.");
    safe_assert(field_id < get_number_of_fields(), "Invalid field id!");
    return mxGetFieldByNumber(array,0,field_id);
  }

  int get_field_number(const char* field_name) const {
    safe_assert(is_struct(),
                "Attempted to access field of a non-struct element.");
    return mxGetFieldNumber(array, field_name);
  }

  bool is_class(const char* classname) const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    safe_assert(classname != NULL, "Invalid classname argument");
    return mxIsClass(array, classname);     
  } // end of is class

  const char* get_classname() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetClassName(array);
  } // end of is class


  size_t size() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetNumberOfElements(array);
  }

  matwrap get_cell(size_t index) const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    safe_assert(mxGetClassID(array) == mxCELL_CLASS,
                "Attempted to access a cell in a non-cell array.");
    return mxGetCell(array, index);
  }  

  bool is_cell() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxIsCell(array);
  }

  bool is_struct() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxIsStruct(array);
  }

  bool is_double() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetClassID(array) == mxDOUBLE_CLASS;
  }


  bool is_uint32() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetClassID(array) == mxUINT32_CLASS;
  }

  bool is_string() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetClassID(array) == mxCHAR_CLASS;
  }


  void as_string(char* str_buffer, size_t buffer_len) {
    safe_assert(str_buffer != NULL, "NULL string buffer");
    int error = mxGetString(array, str_buffer, buffer_len);
    safe_assert(!error, "Error processing string!");
  }

  const mwSize* get_dimensions() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetDimensions(array);
  }

  size_t rows() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetM(array);
  }

  size_t cols() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetN(array);
  }


  const mwSize get_num_dimensions() const {
    safe_assert(array != NULL, "dereferenced null mxArray");
    return mxGetNumberOfDimensions(array);

  }


  static matwrap create_matrix(size_t m, size_t n) {
    return matwrap(mxCreateDoubleMatrix(m, n, mxREAL));
  }

  static matwrap create_cell(size_t m, size_t n) {
    return matwrap(mxCreateCellMatrix(m, n));
  }


};


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/table_factor.m
================================================
%% Construct a discrete table factor
%   
%   factor = table_factor(vars, logP)
%
% vars: array of variable ids (e.g., [1,2,4] )
% logP: tensor representing the log potential values (e.g., ones(3,7,2)
%    where variable 1 takes on 3 states variable 2 takes on 7 states and
%    variable 4 takes on 2 states.
%   
% A table factor represents a factor or potential over a small set of
% discrete variables.  Forexample if we wanted to encode a similarity
% funciton over a pair of variables x1 and x2 we could define the table
% factor:
%
%    psi(x1,x2) = exp( |x1 - x2| )
% 
% Assuming x1 and x2 take on 4 and 3 values respectively we could define
% the matrix (table) representation of psi:
%    
%              0  1  2 
%  tbl = exp(  1  0  1  )
%              2  1  0
%              3  2  1
% 
% We can build a table factor representing this as:
%
%  factor = table_factor([1, 2], tbl);
%
function factor = table_factor(vars, data) 
factor.vars = sort(uint32(vars));
factor.logP = double(data);
end

%%
% Originally I had hoped to used a matlab class but unfortunatley mex
% support for classes is limited resulting in substantial performance
% penalties when accessing fields.

% classdef table_factor
%   properties (SetAccess = private)
%     variables;
%   end
%   properties
%     logP;
%   end
%   methods
%     %% the variables should be a d dimensional array
%     function obj = table_factor(vars, data)
%       obj.variables = uint32(vars);
%       obj.logP = double(data);
%     end
%     
%   end
%   
% end


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/tests/denoise_test.m
================================================
clear;
%% Build the factors
rows = 100;
cols = 100;
states = 5;
lambdaSmooth = 1.5; % Laplace smoothing parameter
noiseP = 0.3; % proportion of randomly sampled values
[factors, img, noisy_img] = ...
  make_grid_model(rows, cols, states, lambdaSmooth, noiseP);

%%
options.alg_type = 'CHROMATIC';
options.nsamples = 50;
options.nskip = 10;
options.ncpus = 4;
disp('---------------');
[samples, nupdates, nchanges, marginals] = ...
  gibbs_sampler(factors, options);

%% Take the last set of beliefs and compute the exected value
for i = 1:options.nsamples
  %   pred_img = reshape(cellfun(@(x) (1:length(x)) * x, marginals(:,i)), ...
  %     rows, cols);
  pred_img = reshape(arrayfun(@(x) (1:length(x)) * x, samples(:,i)), ...
    rows, cols);
  
  figure(1);subplot(1,3,3); colormap('gray');
  imagesc(pred_img); title(['Sample Image ', num2str(i)]);
end

%% Render the final marginal expectations

pred_img = reshape(cellfun(@(x) (1:length(x)) * x, marginals(:,i)), ...
  rows, cols);
figure(2); colormap('gray');
imagesc(pred_img); title('Expected Pixel Marginals');


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/tests/make_grid_model.m
================================================
%% This code generates a grid model
function [factors, img, noisy_img] = make_grid_model(rows, cols, states, ...
   lambdaSmooth, noiseP)

% Create a virtual image
[u,v] = meshgrid(linspace(0,1,rows), linspace(0,1,cols));
img = (1 + cos(1./sqrt((u-.5).^2 + (v-.5).^2)) )/2 + u.^2;
img = (img - min(img(:)))/(max(img(:)) - min(img(:)));
img = (states - 1) * img;
img = round(img) + 1;
figure(1); clf();subplot(1,3,1); colormap('gray');
imagesc(img);
title('Original Image');

% add noise
mask = rand(rows,cols) < noiseP;
noise = ceil(states*rand(rows,cols));
noisy_img = mask .* noise + ~mask .* img;
figure(1); subplot(1,3,2); colormap('gray');
imagesc(noisy_img);
title('Noisy Image');
% Build the edge factor table (in log form)
[u,v] = meshgrid(1:states, 1:states);
edgetbl = -lambdaSmooth * abs(u - v);


% Build the node factor tableS based on the noise model
nodetbls = zeros(rows*cols, states) + ...
   noiseP/(states - 1);
ind = sub2ind([rows * cols, states], (1:(rows*cols))', noisy_img(:));
nodetbls(ind) = 1-noiseP;
nodetbls = log(nodetbls);


% Get all the edges and variables
vars = 1:(rows*cols);
gridvars = reshape(vars, rows, cols);
edges = ...
   [reshape(gridvars(1:(end-1),:), (rows-1) * cols,1), ...
   reshape(gridvars(2:end,:), (rows-1) * cols,1); ...
   reshape(gridvars(:,1:(end-1)), rows * (cols-1), 1), ...
   reshape(gridvars(:,2:end), rows * (cols-1),1)];


% construct the actual factors
factors = cell(length(vars) + length(edges), 1);
index = 1;
for i = 1:length(vars)
   factors{index} = table_factor(vars(i), nodetbls(vars(i),:));   
   index = index + 1;
end
index = length(vars)+1;
for i = 1:length(edges)
   factors{index} = table_factor(sort(edges(i,:)), edgetbl);   
   index = index + 1;
end


end

================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/tests/small_test.m
================================================
clear;
RandStream.setDefaultStream(RandStream.create('mt19937ar', 'seed', 5849))

smoothing = 0.000001;

var_sizes = [3,5,3,2,4];
vars = [1,2,3];
factors{1} = table_factor(vars, log(rand(var_sizes(vars)) + smoothing));

vars = [2,3,4];
factors{2} = table_factor(vars, log(rand(var_sizes(vars)) + smoothing));

vars = [1,3,5];
factors{3} = table_factor(vars, log(rand(var_sizes(vars)) + smoothing));

vars = [4,5];
factors{4} = table_factor(vars, log(rand(var_sizes(vars)) + smoothing));


maxasg = prod(var_sizes);

joint = zeros(var_sizes);

%% compute joint
for i = 1:maxasg
  asg = ind2asg(var_sizes, i);
  for j = 1:length(factors)
    subi = asg2ind(var_sizes(factors{j}.vars), asg(factors{j}.vars));
    joint(i) = joint(i)  + factors{j}.logP(subi);
  end
end

P = exp(joint) / sum(exp(joint(:)));


%% run the Chromatic Sampler
options.alg_type = 'CHROMATIC';
options.nsamples = 1000;
options.nskip = 100;
options.ncpus = 1;

samples = gibbs_sampler(factors, options);

P_est = zeros(var_sizes);
for i = 1:options.nsamples
  ind = asg2ind(var_sizes, samples(:,i)');
  P_est(ind) = P_est(ind) + 1;
end
P_est = P_est ./ sum(P_est(:));


error = abs(P_est - P);
disp(['Chromatic error: ', num2str(max(error(:)))]);

%% run the sampler
options.alg_type = 'SPLASH';
options.nsamples = 1000;
options.nskip = 100;
options.ncpus = 1;
options.treewidth=5;
samples = gibbs_sampler(factors, options);


P_est = zeros(var_sizes);
for i = 1:options.nsamples
  ind = asg2ind(var_sizes, samples(:,i)');
  P_est(ind) = P_est(ind) + 1;
end
P_est = P_est ./ sum(P_est(:));


error = abs(P_est - P);
disp(['Splash error: ', num2str(max(error(:)))]);

================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/matlab/tests/small_test2.m
================================================
clear;
RandStream.setDefaultStream(RandStream.create('mt19937ar', 'seed', 5849))

% Construct strong agreement potentials
nstates = 5;
var_sizes = ones(1,5)*nstates;
tbl = log(eye(nstates) + 0.001);

% Construct a set of factors
factors{1} = table_factor([1,2], tbl);
factors{2} = table_factor([1,3], tbl);
factors{3} = table_factor([1,4], tbl);
factors{4} = table_factor([2,4], tbl);
factors{5} = table_factor([2,5], tbl);
factors{6} = table_factor([3,4], tbl);
factors{7} = table_factor([4,5], tbl);

maxasg = prod(var_sizes);
joint = zeros(var_sizes);

%% compute joint
for i = 1:maxasg
  asg = ind2asg(var_sizes, i);
  for j = 1:length(factors)
    subi = asg2ind(var_sizes(factors{j}.vars), asg(factors{j}.vars));
    joint(i) = joint(i)  + factors{j}.logP(subi);
  end
end

P = exp(joint) / sum(exp(joint(:)));


%% run the Chromatic Sampler
options.alg_type = 'CHROMATIC';
options.nsamples = 1000;
options.nskip = 100;
options.ncpus = 1;

samples =  gibbs_sampler(factors, options);

P_est = zeros(var_sizes);
for i = 1:options.nsamples
  ind = asg2ind(var_sizes, samples(:,i)');
  P_est(ind) = P_est(ind) + 1;
end
P_est = P_est ./ sum(P_est(:));


error = abs(P_est - P);
disp(['Chromatic error: ', num2str(max(error(:)))]);

%% run the sampler
options.alg_type = 'SPLASH';
options.nsamples = 1000;
options.nskip = 10;
options.ncpus = 1;
options.treewidth=5;

samples = gibbs_sampler(factors, options);


P_est = zeros(var_sizes);
for i = 1:options.nsamples
  ind = asg2ind(var_sizes, samples(:,i)');
  P_est(ind) = P_est(ind) + 1;
end
P_est = P_est ./ sum(P_est(:));


error = abs(P_est - P);
disp(['Splash error: ', num2str(max(error(:)))]);

================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/mrf.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "mrf.hpp"

#include "util.hpp"
#include "image.hpp"

#include "global_variables.hpp"


#include <graphlab/macros_def.hpp>


/** Save the beliefs stored in the graph */
void save_beliefs(const mrf_graph_type& mrf,
                  const std::string& filename) {
  std::ofstream fout(filename.c_str());
  fout.precision(16);
  factor_t marginal;
  for(size_t v = 0; v < mrf.num_vertices(); ++v) {
    const mrf_vertex_data& vdata = mrf.vertex_data(v);
    marginal = vdata.belief;
    marginal.normalize();
    fout << vdata.nsamples << '\t';
    size_t arity = marginal.args().var(0).size();
    for(size_t asg = 0; asg < arity; ++asg) {
      fout << std::exp( marginal.logP(asg) );
      if(asg + 1 < arity ) fout << '\t';      
    }
    fout << '\n';
  } 
  fout.close();
} // End of save beliefs


void save_asg(const mrf_graph_type& mrf,
              const std::string& filename) {
  std::ofstream fout(filename.c_str());
  for(size_t v = 0; v < mrf.num_vertices(); ++v) 
    fout << mrf.vertex_data(v).asg << '\n';
  fout.close();
} // End of save beliefs


/** Construct an MRF from the factorized model */
void mrf_from_factorized_model(const factorized_model& model,
                               mrf_graph_type& mrf) {
  typedef mrf_graph_type::vertex_id_type vertex_id_type;
  typedef mrf_graph_type::edge_id_type   edge_id_type;
  ///======================================================================
  // Add all the variables
  factor_t conditional, belief;
  foreach(variable_t variable, model.variables()) {
    mrf_vertex_data vdata(variable, model.factor_ids(variable));
    { // Construct a uniformly random initial assignment
      assignment_t asg(vdata.variable);
      asg.uniform_sample();
      vdata.asg = asg.asg_at(0);
      double& logP = vdata.belief.logP(vdata.asg);
      logP = log(exp(logP) + 1.0);
    }
    // { // construct mode center initial assignment
    //   belief.set_args(variable);
    //   belief.uniform();
    //   conditional.set_args(variable);
    //   const std::set<vertex_id_t>& factor_ids = model.factor_ids(variable);
    //   foreach(vertex_id_t fid, factor_ids) {
    //     conditional.marginalize(model.factors()[fid]);
    // 	   belief *= conditional;
    //   }
    //   belief.normalize();
    //   assignment_t asg = belief.sample();
    //   vdata.asg = asg.asg_at(0);
    //   double& logP = vdata.belief.logP(vdata.asg);
    //   logP = log(exp(logP) + 1.0);
    // }
    const vertex_id_type vid = mrf.add_vertex(vdata);
    // We require variable ids to match vertex id (this simplifies a
    // lot of stuff).
    ASSERT_EQ(vid, variable.id());
  }  
  ASSERT_EQ(mrf.num_vertices(), model.variables().size());

  ///======================================================================
  // Add all the edges
  const factorized_model::factor_map_t& factors(model.factors());
  for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {
    const mrf_vertex_data& vdata = mrf.vertex_data(vid);
    // Compute all the neighbors of this vertex by looping over all
    // the variables in all the factors that contain this vertex
    std::set<variable_t> neighbors;
    foreach(const factor_id_t fid, vdata.factor_ids) {
      const domain_t& args = factors[fid].args();
      for(size_t n = 0; n < args.num_vars(); ++n) {
        variable_t neighbor_var = args.var(n);
        if(vdata.variable != neighbor_var )
          neighbors.insert(neighbor_var);
      }
    }
    // For each of those variables add an edge from this varaible to
    // that variable
    foreach(const variable_t neighbor_variable, neighbors) {
      const vertex_id_type neighbor_vid = neighbor_variable.id();
      mrf_edge_data edata;
      mrf.add_edge(vid, neighbor_vid, edata);      
    }
  } // loop over factors
  mrf.finalize();
} // End of construct_mrf


//! Compute the unormalized likelihood of the current assignment
double unnormalized_loglikelihood(const mrf_graph_type& mrf) {
  typedef mrf_graph_type::vertex_id_type vertex_id_type;
  double sum = 0;
  //  size_t num_factors = SHARED_FACTORS.get().size();
  size_t num_factors = SHARED_FACTORS_PTR->size();
  // Sum the logprob of each factor
  for(factor_id_t fid = 0; fid < num_factors; ++fid) {
    // const factor_t& factor(SHARED_FACTORS.get()[fid]);
    const factor_t& factor((*SHARED_FACTORS_PTR)[fid]);
    // Accumulate the assignments 
    domain_t dom = factor.args();
    assignment_t asg;
    for(size_t i = 0; i < dom.num_vars(); ++i) {
      const vertex_id_type vid = dom.var(i).id();
      const mrf_vertex_data& vdata = mrf.vertex_data(vid);
      ASSERT_EQ(vdata.variable, dom.var(i));
      asg &= assignment_t(vdata.variable, vdata.asg);
    }
    sum += factor.logP(asg);
  }
  return sum;
}


void draw_mrf(const size_t experiment_id,
              const std::string& base_name, 
              const mrf_graph_type& mrf) {
  typedef mrf_graph_type::vertex_id_type vertex_id_type;
  size_t rows = std::sqrt(mrf.num_vertices());
  std::cout << "Rows: " << rows << std::endl;
  image img(rows, rows);
  std::vector<double> values(1);
  factor_t belief;
  for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {
    const mrf_vertex_data& vdata = mrf.vertex_data(vid);
    belief = vdata.belief;
    belief.normalize();
    belief.expectation(values);
    img.pixel(vid) = values[0];
  }
  img.pixel(0) = 0;
  img.pixel(1) = mrf.vertex_data(0).variable.size()-1;
  img.save(make_filename(base_name + "_pred_", ".pgm", experiment_id).c_str());
  
  for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {   
    img.pixel(vid) = mrf.vertex_data(vid).nsamples;
  }
  img.save(make_filename(base_name + "_updates_", ".pgm", experiment_id).c_str());
  
  for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {   
    img.pixel(vid) = mrf.vertex_data(vid).nsamples == 0;
  }
  img.save(make_filename(base_name + "_unsampled_", ".pgm", experiment_id).c_str());
  
  for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {   
    img.pixel(vid) = mrf.vertex_data(vid).asg;
  }
  img.pixel(0) = 0;
  img.pixel(1) = mrf.vertex_data(0).variable.size()-1;
  img.save(make_filename(base_name + "_final_sample_", ".pgm", experiment_id).c_str());
} // end of draw_mrf

#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/mrf.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_MRF_HPP
#define PGIBBS_MRF_HPP

/**
 *
 * This code is ued to represent a markov random field
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>


#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include <map>
#include <set>
#include <string>
#include <cassert>


#include <graphlab.hpp>

#include "factorized_model.hpp"


struct mrf_vertex_data {
  //! Problem specific variables
  variable_t               variable;
  //! current assignment
  size_t                   asg;
  //! The vector of factor_ids associated with this vertex
  std::vector<factor_id_t> factor_ids;
  //! Current belief estimate
  factor_t                 belief;
  //! The number of times this vertex has been sampled
  size_t                   nsamples;
  //! The number of itmes this vertex has changed its value
  size_t                   nchanges;
  //! Properties associated with the tree
  struct tree_info_type {
    double         priority;
    size_t         tree_id;
    size_t         height;
    bool           in_tree; 
    tree_info_type () : 
      priority(-1), tree_id(-1), height(0), in_tree(false) { }
    void save(graphlab::oarchive& arc) const {
      arc << in_tree << tree_id << height << priority;
    }
    void load(graphlab::iarchive& arc) {
      arc >> in_tree >> tree_id >> height >> priority;
    }
  };
  //! tree info
  tree_info_type tree_info;
  mrf_vertex_data() : asg(0), nsamples(0), nchanges(0) { }
  mrf_vertex_data(const variable_t& variable,
                  const std::set<factor_id_t>& factor_ids_set) :
    variable(variable),
    asg(0),
    factor_ids(factor_ids_set.begin(), factor_ids_set.end()),
    belief(domain_t(variable)),
    nsamples(0),
    nchanges(0) {
    // Initialize the belief to "0"
    belief.uniform(-std::numeric_limits<double>::max());
    // Require that factor ids be non empty
    ASSERT_FALSE(factor_ids.empty());
  }
  void save(graphlab::oarchive& arc) const {
    arc << variable << asg << factor_ids << belief << nsamples
        << nchanges << tree_info;
  }
  void load(graphlab::iarchive& arc) {
    arc >> variable >> asg >> factor_ids >> belief >> nsamples
        >> nchanges >> tree_info;
  }  
}; // End of mrf vertex data


/**
 * The data associated with each directed edge in the pairwise markov
 * random field
 */
struct mrf_edge_data { 
  // Currently empty
  void save(graphlab::oarchive &arc) const {  }
  void load(graphlab::iarchive &arc) { }
};

typedef graphlab::graph< mrf_vertex_data, mrf_edge_data> mrf_graph_type;

/** Save the beliefs stored in the graph */
void save_beliefs(const mrf_graph_type& mrf,
                  const std::string& filename);

void save_asg(const mrf_graph_type& mrf,
              const std::string& filename);

/** Construct an MRF from the factorized model */
void mrf_from_factorized_model(const factorized_model& model,
                               mrf_graph_type& mrf);

//! Compute the unormalized likelihood of the current assignment
double unnormalized_loglikelihood(const mrf_graph_type& mrf);

void draw_mrf(const size_t experiment_id,
              const std::string& base_name, 
              const mrf_graph_type& mrf);


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/pgibbs_tls.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include "pgibbs_tls.hpp"

pthread_key_t pgibbs_tls_key;

pgibbs_tls* create_pgibbs_tls() {
  ASSERT_EQ(pthread_getspecific(pgibbs_tls_key), NULL);
  pgibbs_tls* data = new pgibbs_tls();
  ASSERT_NE(data, NULL);
  pthread_setspecific(pgibbs_tls_key, data);
  return data;
}

pgibbs_tls& get_pgibbs_tls() {
  pgibbs_tls* tls =
    reinterpret_cast<pgibbs_tls*>
    (pthread_getspecific(pgibbs_tls_key) );
  // If no tsd be has been associated, create one
  if(tls == NULL) tls = create_pgibbs_tls();
  ASSERT_NE(tls, NULL);
  return *tls;
}

void destroy_pgibbs_tls(void* ptr) {
  pgibbs_tls* tls = 
    reinterpret_cast<pgibbs_tls*>(ptr);
  if(tls != NULL) delete tls;

}


struct pgibbs_tls_key_creater {
  pgibbs_tls_key_creater( )  {
    pthread_key_create(&pgibbs_tls_key,
                       destroy_pgibbs_tls);
  }
};
static const pgibbs_tls_key_creater make_pgibbs_tls_key;


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/pgibbs_tls.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_TLS_HPP
#define PGIBBS_TLS_HPP


/**
 *
 * This code is used to represent thread local storage needed in some
 * of the sampler code
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>

#include <pthread.h>


#include "factorized_model.hpp"


// //! Key used to get the pgibbs tls
// extern pthread_key_t pgibbs_tls_key;

//! Local state available to each thread
struct pgibbs_tls {
  factor_t cavity;
  factor_t conditional_factor;
  factor_t belief;
  factor_t tmp_belief;
};


pgibbs_tls& get_pgibbs_tls();


#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/run_statistics.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_RUN_STATISTICS_HPP
#define PGIBBS_RUN_STATISTICS_HPP

#include "mrf.hpp"

//! Statistics associated with a run
struct run_statistics {
  size_t nsamples;
  size_t nchanges;
  double loglik;
  size_t min_samples;
  size_t max_samples;
  run_statistics() :
    nsamples(0), nchanges(0), loglik(0.0),
    min_samples(std::numeric_limits<size_t>::max()), max_samples(0) { }
  run_statistics(const mrf_graph_type& mrf) :
    nsamples(0), nchanges(0), loglik(0.0),
    min_samples(std::numeric_limits<size_t>::max()), max_samples(0) {
    typedef mrf_graph_type::vertex_id_type vertex_id_type;
    // Compute the unnormalized log likelihood
    loglik = unnormalized_loglikelihood(mrf);
    for(vertex_id_type vid = 0; vid < mrf.num_vertices(); ++vid) {
      const mrf_vertex_data& vdata = mrf.vertex_data(vid);
      nsamples += vdata.nsamples;
      nchanges += vdata.nchanges;
      min_samples = std::min(min_samples, vdata.nsamples);
      max_samples = std::max(max_samples, vdata.nsamples);
    } // end of for loop
  } // end of compute run statistics
  
  void print() const {
    std::cout << "nsamples:        " << nsamples << std::endl
              << "nchanges:        " << nchanges << std::endl
              << "loglik:          " << loglik   << std::endl
              << "min_samples:     " << min_samples << std::endl
              << "max_samples:     " << max_samples << std::endl;
  }
};

#endif


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/sampler.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * This program runs the various gibbs samplers
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>

// Including Standard Libraries
#include <ctime>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <queue>
#include <stack>
#include <string>
#include <set> 
#include <algorithm>
#include <limits>
#include <cmath>


#include <boost/program_options.hpp>
#include <boost/bind.hpp>


#include <graphlab.hpp>


// Image reading/writing code
#include "util.hpp"
#include "factorized_model.hpp"
#include "mrf.hpp"
#include "junction_tree.hpp"

#include "chromatic_sampler.hpp"
#include "jt_splash_sampler.hpp"
#include "global_variables.hpp"


// Include the macro for the foreach operation
#include <graphlab/macros_def.hpp>


// Results files =============================================================>
const std::string chromatic_results_fn = "chromatic_results.tsv";
const std::string async_results_fn     = "async_results.tsv";
const std::string splash_results_fn    = "splash_results.tsv";
const std::string jtsplash_results_fn  = "jtsplash_results.tsv";


// Command Line Arguments  ====================================================>
std::string model_filename; 
std::string experiment_type = "chromatic";
std::vector<double> runtimes(1, 10);  
bool draw_images = false;

size_t treesize = 1000;
size_t treewidth = 3;
size_t treeheight = std::numeric_limits<size_t>::max();
size_t factorsize = std::numeric_limits<size_t>::max();
size_t subthreads = 1;
bool   priorities = false;


// MAIN =======================================================================>
int main(int argc, char** argv) { 

  // set the global logger
  global_logger().set_log_level(LOG_WARNING);
  global_logger().set_log_to_console(true);


  // std::srand ( graphlab::timer::usec_of_day() );
  // graphlab::random::seed();
  
  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts;

  clopts.attach_option("model",
                       &model_filename, model_filename,
                       "model file name");
  clopts.add_positional("model");

  clopts.attach_option("experiment", 
                       &experiment_type, experiment_type,
                       "the type of experiment to run "
                       "{chromatic, jtsplash}");
  clopts.add_positional("experiment");

  clopts.attach_option("runtimes", 
                       &runtimes, runtimes,
                       "total runtime in seconds");


  clopts.attach_option("draw_images", 
                       &draw_images, draw_images,
                       "draw pictures (assume sqrt(numvert) rows)");


  clopts.attach_option("treesize", 
                       &treesize, treesize,
                       "The maximum number of variables in a junction tree");

  clopts.attach_option("treewidth", 
                       &treewidth, treewidth,
                       "The maximum treewidth.");

  clopts.attach_option("treeheight", 
                       &treeheight, treeheight,
                       "The maximum height of the trees. ");

  clopts.attach_option("factorsize", 
                       &factorsize, factorsize,
                       "The maximum factorsize");

  clopts.attach_option("subthreads", 
                       &subthreads, subthreads,
                       "The number of threads to use inside each tree "
                       "(zero means not used)");

  clopts.attach_option("priorities",
                       &priorities, priorities,
                       "Use priorities?");

  // Set defaults for scope and scheduler
  clopts.set_scheduler_type("fifo");
  clopts.set_scope_type("edge");

  if( !clopts.parse(argc, argv) ) { 
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    return EXIT_FAILURE;
  }


  std::cout << "Application Options" << std::endl;
  std::cout 
    << "model:          " << model_filename << std::endl
    << "experiment:     " << experiment_type << std::endl 
    << "runtime:        " 
    << boost::lexical_cast<std::string>(runtimes) << std::endl;
   
  std::cout << "Graphlab Options" << std::endl;
  clopts.print();


  // create model filename
  std::cout << "Load alchemy file." << std::endl;
  factorized_model factor_graph;
  factor_graph.load_alchemy(model_filename);

  // Set the global factors
  //SHARED_FACTORS.set(factor_graph.factors());
  SHARED_FACTORS_PTR = &(factor_graph.factors());
  

  std::cout << "Building graphlab MRF GraphLab core." << std::endl;
  mrf_gl::core mrf_core;
  mrf_from_factorized_model(factor_graph, mrf_core.graph());
  mrf_core.set_engine_options(clopts);

  std::cout << "Computing coloring." << std::endl;
  size_t colors = mrf_core.graph().compute_coloring();
  std::cout << "Colors: " << colors << std::endl;
  
  // Create synthetic images -------------------------------------------------->
  if(experiment_type == "chromatic") {
    run_chromatic_sampler(mrf_core, 
                          chromatic_results_fn, 
                          runtimes,
                          draw_images);
  } if(experiment_type == "jtsplash") {
    splash_settings settings; 
    settings.ntrees = mrf_core.get_engine_options().get_ncpus();
    settings.max_tree_size = treesize;
    settings.max_tree_height = treeheight;
    settings.max_tree_width = treewidth;
    settings.max_tree_height = treeheight;
    settings.priorities = priorities;
    settings.subthreads = subthreads;
    
    run_jtsplash_sampler(mrf_core.graph(),
                         jtsplash_results_fn,
                         runtimes,
                         draw_images,
                         settings);

  } else {
    std::cout << "Invalid experiment type!" << std::endl;
    return EXIT_FAILURE;
  }
  
  std::cout << "Done!" << std::endl;
  return EXIT_SUCCESS;  
} // End of main


#include <graphlab/macros_undef.hpp>


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/util.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <ctime>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <sstream>


#include "util.hpp"


size_t file_line_count(const std::string& experiment_file) {
  std::ifstream fin(experiment_file.c_str());
  size_t lines = 0;
  std::string line;
  while(getline(fin, line)) lines++;
  fin.close();
  return lines;
}


std::string make_filename(const std::string& base,
                          const std::string& suffix,
                          const size_t number) {
  std::stringstream strm;
  strm << base
       << std::setw(10) << std::setfill('0')
       << number
       << suffix;
  std::cout << strm.str() << std::endl;
  return strm.str();
}


================================================
FILE: toolkits/graphical_models/deprecated/gibbs_sampling/util.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef PGIBBS_UTIL_HPP
#define PGIBBS_UTIL_HPP

#include <string>

//! Get the number of lines in the file
size_t file_line_count(const std::string& experiment_file);


//! make a filename from base sufix and number
std::string make_filename(const std::string& base,
                          const std::string& suffix,
                          const size_t number);


#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/CMakeLists.txt
================================================
project(GraphLab)


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/BallTreeDensity.cpp
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 //
// Matlab MEX interface for KD-tree C++ functions
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//

//#define MEX
#include "cpp/BallTreeDensity.h"
#ifdef MEX
#include "mex.h"
#endif

#ifdef MEX
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{

  // check for the right number of arguments
  if((nrhs < 3)||(nrhs > 4))
    mexErrMsgTxt("Takes 3-4 input arguments");
  if(nlhs != 1)
    mexErrMsgTxt("Outputs one result (a structure)");

  if (nrhs == 3) //                          points, weights, bandwidths
    plhs[0] = BallTreeDensity::createInMatlab(prhs[0],prhs[1],prhs[2]);
  else {          //                          points, weights, bandwidths,type
    int ktype = (int) mxGetScalar(prhs[3]);
    plhs[0] = BallTreeDensity::createInMatlab(prhs[0],prhs[1],prhs[2],(BallTreeDensity::KernelType) ktype);
  }
}
#else


#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/CMakeLists.txt
================================================
project(GraphLab)

# if (ITPP-FOUND)
#   add_graphlab_executable(nbp denoise.cpp BallTreeDensity.cpp cpp/BallTreeClass.cc cpp/BallTreeDensityClass.cc)
# endif()


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/BallTree.h
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 //////////////////////////////////////////////////////////////////////////////////////
// BallTree.h  --  class definition for a BallTree (actually KD-tree) object
//
// A few functions are defined only for MEX calls (construction & load from matlab)
// Most others can be used more generally.
// 
//////////////////////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//
//////////////////////////////////////////////////////////////////////////////////////
#ifndef __BALL_TREE_H
#define __BALL_TREE_H

#ifdef MEX
#include "mex.h"
#endif

#include <math.h>
#include <stdint.h>
#include <itpp/itbase.h>
#include "../kde.h"

#define FALSE 0
#define TRUE 1

double log(double);
double exp(double);
double sqrt(double);
double pow(double , double);
double fabs(double);
#define PI 3.141592653589

inline double * vec2vec(const itpp::vec * _vec){
	double * ret = new double[_vec->size()];
        memcpy(ret, _vec->_data(), _vec->size() * sizeof(double)); 
       return ret;
}
inline double * vec2vec(const itpp::mat * _mat){
	double * ret = new double[_mat->size()];
        memcpy(ret, _mat->_data(), _mat->size() * sizeof(double)); 
       return ret;
}


class BallTree {
 public:
  //typedef unsigned int index;              // define "index" type (long)
  typedef uint32_t index;              // define "index" type (long)
  const static BallTree::index NO_CHILD = (index) -1;  // indicates no further children

  /////////////////////////////
  // Constructors
  /////////////////////////////
  
  //BallTree( unsigned int d, index N, double* centers_,
  //     double* ranges_, double* weights_ );
  BallTree(){
    lowest_leaf = NULL;
    highest_leaf = NULL;
    left_child = NULL;
    right_child = NULL;
    permutation = NULL;
    ranges = NULL;
    centers = NULL;
    weights = NULL;
    dims = 0; num_points = 0; next = 1;
  }
  virtual ~BallTree();
#ifdef MEX
  BallTree(const mxArray* structure);     // for loading ball trees from matlab
  
  // For creating BallTree structures in matlab:
  static mxArray*  createInMatlab(const mxArray* pts, const mxArray* wts);
#else
  BallTree(const kde & pkde);     // for loading ball trees from matlab
#endif

  /////////////////////////////
  // Accessor Functions  
  /////////////////////////////
  BallTree::index root() const              { return 0; }
  unsigned int    Ndim() const              { return dims; }
  index Npts()                    const { return num_points; }
  index Npts(BallTree::index i)   const { return highest_leaf[i]-lowest_leaf[i]+1; }
  const double* center(BallTree::index i)   const { return centers+i*dims; }
  const double* range(BallTree::index i)    const { return ranges +i*dims; }
  double  weight(BallTree::index i)         const { return *(weights+i); }
  bool isLeaf(BallTree::index ind)          const { return ind >= num_points; }
  bool validIndex(BallTree::index ind)      const { return ((0<=ind) && (ind < 2*num_points)); }
  BallTree::index left(BallTree::index i)   const { return left_child[i]; }
  BallTree::index right(BallTree::index i)  const { return right_child[i]; }
  BallTree::index leafFirst(BallTree::index i) const { return lowest_leaf[i]; }
  BallTree::index leafLast(BallTree::index i)  const { return highest_leaf[i]; }
  void clean();

  // Convert a BallTree::index to the numeric index in the original data
  index getIndexOf(BallTree::index i) const { return permutation[i]; }

  void movePoints(double*);
  void changeWeights(const double *);

  // Test two sub-trees to see which is nearer another BallTree
  BallTree::index closer(BallTree::index, BallTree::index, const BallTree&,BallTree::index) const;  
  BallTree::index closer(BallTree::index i, BallTree::index j, const BallTree& other_tree) const
      { return closer(i,j,other_tree,other_tree.root()); };

  void kNearestNeighbors(index *, double *, const double *, int, int) const;

  /////////////////////////////
  // Private class f'ns
  /////////////////////////////
 protected:
#ifdef MEX
  static mxArray*  matlabMakeStruct(const mxArray* pts, const mxArray* wts);
#endif
  virtual void calcStats(BallTree::index);     // construction recursion

  unsigned int dims;             // dimension of data 
  BallTree::index num_points;     // # of points 
  double *centers;                // ball centers, dims numbers per ball 
  double *ranges ;                 // bounding box ranges, dims per ball, dist from center to one side
  double *weights;                // total weight in each ball 
  
  BallTree::index *left_child ,  *right_child ;  // left, right children; no parent indices
  BallTree::index *lowest_leaf , *highest_leaf; // lower & upper leaf indices for each ball
  BallTree::index *permutation;                // point's position in the original data

  BallTree::index next;                        // internal var for placing the non-leaf nodes 

  static const char *FIELD_NAMES[];            // list of matlab structure fields
  static const int nfields;

  // for building the ball tree
  void buildBall(BallTree::index firstLeaf, BallTree::index lastLeaf, BallTree::index root);
  BallTree::index most_spread_coord(BallTree::index, BallTree::index) const;
  BallTree::index partition(unsigned int dim, BallTree::index low, BallTree::index high);
  virtual void swap(BallTree::index, BallTree::index);         // leaf-swapping function

  void select(unsigned int dimension, index position, 
		       index low, index high);

  double minDist(index, const double*) const;
  double maxDist(index, const double*) const;

  // build the non-leaf nodes from the leaves
  void buildTree();
};

#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/BallTreeClass.cc
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 //////////////////////////////////////////////////////////////////////////////////////
// BallTreeClass  --  class definitions for a BallTree (actually KD-tree) 
//                    object, primarily for use in matlab MEX files.
//
// See BallTree.h for the class definition.
//
//////////////////////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//
//////////////////////////////////////////////////////////////////////////////////////

//#define MEX
#include <math.h>
#include "BallTree.h"
#include <utility>
#include <map>
#include <assert.h>

#include "../kde.h"

const char* BallTree::FIELD_NAMES[] = {"D", "N", "centers", "ranges", "weights",
            "lower", "upper", "leftch", "rightch", "perm"};
const int BallTree::nfields = 10;

// Given the leaves, build the rest of the tree from the top down.
// Split the leaves along the most spread coordinate, build two balls
// out of those, and then build a ball around those two children.
void BallTree::buildBall(BallTree::index low, BallTree::index high, BallTree::index root)
{
  // special case for N=1 trees
  if(low == high) {
    lowest_leaf[root] = low;
    highest_leaf[root] = high;
    left_child[root] = low;

    // point right child to the same as left for calc stats, and then
    // point it to the correct NO_CHILD afterwards.  kinda kludgey
    right_child[root] = high;
    calcStats(root);
    right_child[root] = NO_CHILD;

    return;
  }

  BallTree::index coord, split, left, right;
  coord = most_spread_coord(low, high);    // find dimension of widest spread
  
  // split the current leaves into two groups, to build balls on them.
  // Chose the most spread coordinate to split them on, and make sure
  // there are the same number of points in each (+-1 for round off
  // error).
  split = (low + high) / 2;
  select(coord, split, low, high);

  // an alternative is to use partition, but that doesn't deal well
  // with repeated numbers and it doesn't split into balanced sets.
//   split = partition(coord, low, high);

  // if the left sub-tree is just one leaf, don't make a new non-leaf
  // node for it, just point left_idx directly to the leaf itself.
  if(split <= low)    left = low;
  else                left = next++;

  // same for the right
  if(split+1 >= high) right = high;
  else                right = next++;

  lowest_leaf[root]  = low;
  highest_leaf[root] = high;
  left_child[root]   = left;
  right_child[root]  = right;

  // build sub-trees if necessary
  if(left != low)    buildBall(low, split, left);
  if(right != high)  buildBall(split+1, high, right);

  calcStats(root);
}

// Find the dimension along which the leaves between low and high
// inclusive have the greatest variance
BallTree::index BallTree::most_spread_coord(BallTree::index low, BallTree::index high) const
{
  BallTree::index dimension, point, max_dim;
  double mean, variance, max_variance;

  max_variance = 0;
  max_dim = 0;
  assert(dims >0);

  for(dimension = 0; dimension<dims; dimension++) {
    mean = 0;
    for(point = dims*low + dimension; point < dims*high; point += dims)
      mean += centers[point];
    mean /= (high - low);

    variance = 0;
    for(point = dims*low + dimension; point < dims*high; point += dims)
      variance += (centers[point] - mean) * (centers[point] - mean);
    if(variance > max_variance) {
      max_variance = variance;
      max_dim = dimension;
    }
  }

  return max_dim;
}


// straight from CLR, the unrandomized partition algorithm for
// quicksort.  Partitions the leaves from low to high inclusive around
// a random pivot in the given dimension.  Does not affect non-leaf
// nodes, but does relabel the leaves from low to high.
BallTree::index BallTree::partition(unsigned int dimension, BallTree::index low, 
				  BallTree::index high) 
{
  BallTree::index pivot;

  pivot = low;  // not randomized, could set pivot to a random element

  while(low < high) {
    while(centers[dims*high + dimension] >= centers[dims*pivot + dimension])
      high--;
    while(centers[dims*low + dimension] < centers[dims*pivot + dimension])
      low++;
    
    swap(low, high);
    pivot = high;
  }

  return high;
}


// Function to partition the data into two (equal-sized or near as possible)
//   sets, one of which is uniformly greater than the other in the given
//   dimension.
void BallTree::select(unsigned int dimension, BallTree::index position,
		      BallTree::index low, BallTree::index high)
{
  BallTree::index m,r,i;
  
  while (low < high) {
    r = (low + high)/2; 
    swap(r,low);
    m = low;
    for (i=low+1; i<=high; i++) {
      if (centers[dimension+dims*i] < centers[dimension+dims*low]) {
        m++;
        swap(m,i);
      } 
    }
    swap(low,m);
    if (m <= position) low=m+1;
    if (m >= position) high=m-1;
  }    
}


// Swap the ith leaf with the jth leaf.  Actually, only swap the
// weights, permutation, and centers, so only for swapping
// leaves. Will not swap ranges correctly and will not swap children
// correctly.
void BallTree::swap(BallTree::index i, BallTree::index j) 
{
  BallTree::index k;
  double tmp;

  if (i==j) return;

  // swap weights
  tmp = weights[i];    weights[i] = weights[j];          weights[j] = tmp;

  // swap perm
  k = permutation[i];  permutation[i] = permutation[j];  permutation[j] = k;

  // swap centers
  i *= dims;   j *= dims;
  for(k=0; k<dims; i++,j++,k++) {
    tmp = centers[i];   centers[i]  = centers[j];   centers[j]  = tmp;
  }
}

//
// Calculate the statistics of level "root" based on the statistics of
//   its left and right children.
//
void BallTree::calcStats(BallTree::index root)
{
  BallTree::index Ni, NiL, NiR;
  index d;

  BallTree::index leftI = left(root), rightI=right(root);   // get children indices 
  if (!validIndex(leftI) || !validIndex(rightI)) return;    // nothing to do if this
                                                            //   isn't a parent node
  assert(dims > 0);

  // figure out the center and ranges of this ball based on it's children
  double max, min;
  for(d=0; d<dims; d++) {
    if (center(leftI)[d] + range(leftI)[d] > center(rightI)[d] + range(rightI)[d])
      max = center(leftI)[d] + range(leftI)[d];
    else
      max = center(rightI)[d] + range(rightI)[d];

    if (center(leftI)[d] - range(leftI)[d] < center(rightI)[d] - range(rightI)[d])
      min = center(leftI)[d] - range(leftI)[d];
    else
      min = center(rightI)[d] - range(rightI)[d];

    centers[root*dims+d] = (max+min) / 2;
    ranges[root*dims+d] = (max-min) / 2;
  }    
  
  // if the left ball is the same as the right ball (should only
  // happen when calling the function directly with the same argument
  // twice), don't count the weight twice
  if(leftI != rightI)
    weights[root] = weights[leftI] + weights[rightI];
  else
    weights[root] = weights[leftI];
}
  

// Public method to build the tree, just calls the private method with
// the proper starting arguments.
void BallTree::buildTree()
{
  BallTree::index i,j;
  for (j=0, i=num_points; j<num_points; i++,j++) {
    for(index k=0; k<dims; k++)
      ranges[i*dims+k] = 0;
 
    lowest_leaf[i] = highest_leaf[i] = i; 
    left_child[i] = i; 
    right_child[i] = NO_CHILD;
    permutation[i] = j;
  }
  next = 1;

  buildBall(num_points, 2*num_points - 1, 0);
}

// Figure out which of two children in this tree is closest to a given
// ball in another tree.  Returns the index in this tree of the closer
// child.
BallTree::index BallTree::closer(BallTree::index myLeft, BallTree::index myRight, const BallTree& otherTree,
			       BallTree::index otherRoot) const 
{
  if (myRight==NO_CHILD || otherRoot==NO_CHILD) return myLeft;
  double dist_sq_l = 0, dist_sq_r = 0;
  assert(dims >0);
 
  for(int i=0; i<dims; i++) {
    dist_sq_l += (otherTree.center(otherRoot)[i] - center(myLeft)[i]) * 
      (otherTree.center(otherRoot)[i] - center(myLeft)[i]);
    dist_sq_r += (otherTree.center(otherRoot)[i] - center(myRight)[i]) * 
      (otherTree.center(otherRoot)[i] - center(myRight)[i]);
  }

  if (dist_sq_l < dist_sq_r)
    return myLeft;
  else 
    return myRight;
}

//
// Perform a *slight* adjustment of the tree: move the points by delta, but
//   don't reform the whole tree; just fix up the statistics.
//
void BallTree::movePoints(double* delta)
{
  assert(dims >0);
  index i;
  for (i=leafFirst(root());i<=leafLast(root());i++)
    for (unsigned int k=0;k<dims;k++)                   // first adjust locations by delta
      centers[dims*i+k] += delta[ getIndexOf(i)*dims + k ];
  for (i=num_points-1; i != 0; i--)                     // then recompute stats of
    calcStats(i);                                       //   parent nodes
  calcStats(root());                                    //   and finally root node
}

// Assumes newWeights is the right size (num_points)
void BallTree::changeWeights(const double *newWeights) {

  assert(num_points>0);
  for(index i=num_points, j=0; i<num_points*2; i++, j++)
    weights[i] = newWeights[ getIndexOf(i) ];

  for (index i=num_points-1; i != 0; i--)
    calcStats(i);
  calcStats(root());
}


/////////////////////// k nearest neighbors functions ///////////////////////

// returns distance squared
double BallTree::minDist(index myBall, const double* point) const
{
  double dist = 0, tmp;

  assert(dims >0);
  for(index i=0; i<dims; i++) {
    tmp = fabs(center(myBall)[i] - point[i]) - range(myBall)[i];
    if(tmp >= 0)
      dist += tmp*tmp;
  }
  
  return dist;
}
double BallTree::maxDist(index myBall, const double* point) const
{
  double dist = 0, tmp;

  assert(dims >0);
  for(index i=0; i<dims; i++) {
    tmp = fabs(center(myBall)[i] - point[i]) + range(myBall)[i];
    dist += tmp*tmp;
  }
  
  return dist;
}


typedef std::multimap<double, BallTree::index> myMap;

// nns is a K x N matrix
// dists is a 1 x N vector
// points is a D x N matrix
void BallTree::kNearestNeighbors(index *nns, double *dists, const double *points, 
				 int N, int k) 
  const
{
  myMap m;
  int leavesDone = 0;
  index lastBall;
  assert(dims >0);
  assert(N>0);

  for(index target = 0; target < N*dims; target += dims, ++leavesDone) {  

    int nnsSoFar = 0;
    double leastDist = maxDist(root(), points+target);

    m.insert(myMap::value_type(minDist(root(), points+target), root()));
    // examining points in order of min dist means that when you see a
    // point, it is the closest point left
    
    while(! m.empty() && nnsSoFar < k) {
      index current = (*m.begin()).second;
      m.erase(m.begin());
      
      if(isLeaf(current)) {
	// since the nodes are sorted by minDist, a leaf at the front of
	// the pq must be the next nearest neighbor
	nns[leavesDone*k + nnsSoFar++] = current;
	lastBall = current;
      } else {  // not a leaf
	// push both children
	m.insert(myMap::value_type(minDist(left_child[current], points+target), 
				   left_child[current]));
	m.insert(myMap::value_type(minDist(right_child[current], points+target), 
				   right_child[current]));
      }
      
      bool keepInnerPruning = true;
      myMap::iterator it = m.begin(), last;
      // get rid of ineligible balls and find the min distance
      while(it != m.end()) {
	current = (*it).second;
	double max = maxDist(current, points+target);
	if(max < leastDist && Npts(current) >= k - nnsSoFar)
	  leastDist = max;
	
//	if((*it).first > leastDist) {
//	  // we see the points in order of minDist, so once we see one
//	  // that's too big, the rest will also be too big
//	  m.erase(it, m.end());
//	  break;
//	}
	
	myMap::iterator last = it++;
	if(keepInnerPruning) {
	  if(it != m.end() && max < (*it).first && Npts(current) < k - nnsSoFar) {
	    // if the closest ball doesn't have too many points and all of
	    // them are nearer than any other ball, include them all
	    //   note "<" not "<=" so that *dist will be right
	    lastBall = current;
	    for(index i=leafFirst(current); i <= leafLast(current); i++)
	      nns[leavesDone*k + nnsSoFar++] = i;
	    m.erase(last);
	  } else {
	    keepInnerPruning = false;
	  }
	}
      } // end pruning
    } // end single nearest neighbor

    // clear out the remaining points
    m.clear();

    dists[leavesDone] = sqrt(maxDist(lastBall, points+target));
    
    index i;
    for(i=leavesDone*k; i < leavesDone*k+nnsSoFar; i++)
      nns[i] = getIndexOf(nns[i]);
    for(i=leavesDone*k+nnsSoFar; i<(leavesDone+1)*k; i++)
      nns[i] = NO_CHILD;
  } // end all nearest neighbors
}

/////////////////////////////// matlab functions ////////////////////////////

// Constructor that doesn't initialize members, so that they can be
// set by the loadFromMatlab and createInMatlab functions.
//BallTree::BallTree() : next(1) {}


#ifdef MEX

// Load the arrays already allocated in matlab from the given
// structure.
BallTree::BallTree(const mxArray* structure) 
{
  dims       = (unsigned int) mxGetScalar(mxGetField(structure,0,"D")); // get the dimensions
  num_points = (BallTree::index) mxGetScalar(mxGetField(structure,0,"N")); //
  
  centers = (double*) mxGetPr(mxGetField(structure,0,"centers"));
  ranges  = (double*) mxGetPr(mxGetField(structure,0,"ranges"));
  weights = (double*) mxGetPr(mxGetField(structure,0,"weights"));

  lowest_leaf = (BallTree::index*) mxGetData(mxGetField(structure,0,"lower"));
  highest_leaf= (BallTree::index*) mxGetData(mxGetField(structure,0,"upper"));
  left_child  = (BallTree::index*) mxGetData(mxGetField(structure,0,"leftch"));
  right_child = (BallTree::index*) mxGetData(mxGetField(structure,0,"rightch"));
  permutation = (BallTree::index*) mxGetData(mxGetField(structure,0,"perm"));

  next = 1;    // unimportant
}

// Create new matlab arrays and put them in the given structure.
mxArray* BallTree::createInMatlab(const mxArray* _pointsMatrix, const mxArray* _weightsMatrix)
{
  mxArray* structure;
  structure = matlabMakeStruct(_pointsMatrix,_weightsMatrix);
  BallTree bt(structure);
  if (bt.Npts() > 0) bt.buildTree();

  return structure;
}

// Create new matlab arrays and put them in the given structure.
mxArray* BallTree::matlabMakeStruct(const mxArray* _pointsMatrix, const mxArray* _weightsMatrix)
{
  mxArray* structure;
  BallTree::index i, j;
  double *_points, *_weights;
  
  // get fields from input arguments
  unsigned int Nd = mxGetM(_pointsMatrix);
  BallTree::index Np = mxGetN(_pointsMatrix);
  _points  = (double*)mxGetData(_pointsMatrix);
  _weights = (double*)mxGetData(_weightsMatrix);

  // create structure, populate it, and get handles to the arrays
  structure = mxCreateStructMatrix(1, 1, nfields, FIELD_NAMES);
  
  mxSetField(structure, 0, "D",       mxCreateDoubleScalar((double) Nd));
  mxSetField(structure, 0, "N",       mxCreateDoubleScalar((double) Np));

  mxSetField(structure, 0, "centers", mxCreateDoubleMatrix(Nd, 2*Np, mxREAL));
  mxSetField(structure, 0, "ranges",  mxCreateDoubleMatrix(Nd, 2*Np, mxREAL));
  mxSetField(structure, 0, "weights", mxCreateDoubleMatrix(1, 2*Np, mxREAL));

  mxSetField(structure, 0, "lower",   mxCreateNumericMatrix(1, 2*Np, mxUINT32_CLASS, mxREAL));
  mxSetField(structure, 0, "upper",   mxCreateNumericMatrix(1, 2*Np, mxUINT32_CLASS, mxREAL));
  mxSetField(structure, 0, "leftch",  mxCreateNumericMatrix(1, 2*Np, mxUINT32_CLASS, mxREAL));
  mxSetField(structure, 0, "rightch", mxCreateNumericMatrix(1, 2*Np, mxUINT32_CLASS, mxREAL));
  mxSetField(structure, 0, "perm",    mxCreateNumericMatrix(1, 2*Np, mxUINT32_CLASS, mxREAL));

  // initialize arrays
  double* centers = (double *) mxGetData(mxGetField(structure, 0, "centers"));
  double* weights = (double *) mxGetData(mxGetField(structure, 0, "weights"));
  for (j=0,i=Nd*Np; j<Nd*Np; i++,j++)
    centers[i] = _points[j];
  for (j=0,i=Np; j<Np; i++,j++)
    weights[i] = _weights[j];

  return structure;
}

#else
BallTree::BallTree(const kde& structure) 
{
  //dims       = (unsigned int) mxGetScalar(mxGetField(structure,0,"D")); // get the dimensions
  dims = structure.centers.rows();
  assert(dims == 1);
  //num_points = (BallTree::index) mxGetScalar(mxGetField(structure,0,"N")); //
  num_points = structure.centers.cols();
  assert(num_points >= 1); 
 
  //centers = (double*) mxGetPr(mxGetField(structure,0,"centers"));
  itpp::vec temp = structure.centers.get_row(0);
  centers = new double[dims*2*num_points];
  for (int i=0; i< num_points; i++)
     centers[i+dims*num_points] = temp[i];
  //ranges  = (double*) mxGetPr(mxGetField(structure,0,"ranges"));

  ranges = new double[2 * dims*num_points];
  //weights = (double*) mxGetPr(mxGetField(structure,0,"weights"));
  //weights = vec2vec(&structure.weights);
  weights = new double[2*dims*num_points];
  for (int i=0; i< num_points; i++)
     weights[i+dims*num_points] = structure.weights(i);

  lowest_leaf = new unsigned int[2*num_points]; //(BallTree::index*) mxGetData(mxGetField(structure,0,"lower"));
  highest_leaf=  new unsigned int[2*num_points];//(BallTree::index*) mxGetData(mxGetField(structure,0,"upper"));
  left_child  =  new unsigned int[2*num_points];//(BallTree::index*) mxGetData(mxGetField(structure,0,"leftch"));
  right_child =  new unsigned int [2*num_points];//BallTree::index*) mxGetData(mxGetField(structure,0,"rightch"));
  permutation =  new unsigned int [2*num_points];//BallTree::index*) mxGetData(mxGetField(structure,0,"perm"));
   
  next = 1;    // unimportant

  //buildTree();
}

void BallTree::clean(){
  if (lowest_leaf != NULL)
       delete[] lowest_leaf;
  if (highest_leaf != NULL)
       delete[] highest_leaf;
  if (left_child != NULL)
       delete[] left_child;
  if (right_child != NULL)
       delete[] right_child;
  if (permutation != NULL)
       delete[] permutation;
  if (centers != NULL)
       delete[] centers;
  if (weights!= NULL)
       delete[] weights;
  dims = num_points = 0;
}

BallTree::~BallTree(){
/*  if (lowest_leaf != NULL)
       delete[] lowest_leaf;
  if (highest_leaf != NULL)
       delete[] highest_leaf;
  if (left_child != NULL)
       delete[] left_child;
  if (right_child != NULL)
       delete[] right_child;
  if (permutation != NULL)
       delete[] permutation;
  if (centers != NULL)
       delete[] centers;
  if (weights!= NULL)
       delete[] weights;
*/
//  printf("was in destructor\n");
}
#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/BallTreeDensity.h
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 //////////////////////////////////////////////////////////////////////////////////////
// BallTreeDensity.h  --  class definition for a tree-based kernel density estimate
//
// A few functions are defined only for MEX calls (construction & load from matlab)
// Most others can be used more generally.
// 
//////////////////////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//
//////////////////////////////////////////////////////////////////////////////////////
#ifndef __BALL_TREE_DENSITY_H
#define __BALL_TREE_DENSITY_H

#include "BallTree.h"
#include <assert.h>
#include <float.h>
#include "../kde.h"

#ifndef NULL
#define NULL 0
#endif

class BallTreeDensity : public BallTree {
 public:
  enum KernelType { Gaussian, Epanetchnikov, Laplacian };
  KernelType getType(void) const { return type; };

  enum Gradient { WRTMean, WRTVariance, WRTWeight };

  /////////////////////////////
  // Constructors
  /////////////////////////////

  //BallTreeDensity( unsigned int d, index N, double* points_,
  //     double* weights_, double* bandwidths_);
  BallTreeDensity() : BallTree() { bandwidth = bandwidthMax = bandwidthMin = NULL; }
#ifdef MEX             // for loading ball trees from matlab
  BallTreeDensity(const mxArray* structure);
  static mxArray* createInMatlab(const mxArray* pts, const mxArray* wts, const mxArray* bw, BallTreeDensity::KernelType _type=Gaussian);
#else
  BallTreeDensity(const kde & pkde);
  BallTreeDensity* createInMatlab(const itpp::mat* pts, const itpp::vec* wts, const itpp::vec* bw, BallTreeDensity::KernelType _type=Gaussian);
#endif

  /////////////////////////////
  // Accessor Functions  
  /////////////////////////////
  const double* mean(BallTree::index i)     const { return means+i*dims; }
  const double* variance(BallTree::index i) const { return bandwidth+i*dims; } // !!! only works for Gaussian

  const double* bw(BallTree::index i)     const { return bandwidth   +i*dims; }
  const double* bwMax(BallTree::index i)  const { return bandwidthMax+i*dims*multibandwidth; }
  const double* bwMin(BallTree::index i)  const { return bandwidthMin+i*dims*multibandwidth; }
  bool bwUniform(void) const { return multibandwidth==0; };
  //   -- Others inherited from BallTree --

///////////////////////////////
//
// Evaluation of the density at a set of points:
//   pre-constructed balltree version
//   array of doubles version
//   leave-one-out cross-validation version
//
  void evaluate(const BallTree& atPoints, double* values, double maxErr=0) const;
//  void evaluate(index Npts, const double* atPoints, double* values, double maxErr=0) const;
  void evaluate(double* p, double maxErr) const {  evaluate(*this,p,maxErr); }

  void llGrad(const BallTree& locations, double* gradDens, double* gradAt, double tolEval, double tolGrad, Gradient) const;
//  void llGrad(index Npts, const double* atPoints, double* gradDens, double* gradAt, double tolEval, double tolGrad) const;
 
  bool updateBW(const double*, index);


  /////////////////////////////
  // Private object functions
  /////////////////////////////
 protected:
#ifdef MEX
  static mxArray* matlabMakeStruct(const mxArray* pts, const mxArray* wts, const mxArray* bw, BallTreeDensity::KernelType type);
#endif
  virtual void swap(BallTree::index, BallTree::index);// leaf-swapping function
  virtual void   calcStats(BallTree::index root);     // recursion for computing BW ranges

  KernelType type;
  unsigned int multibandwidth;    // flag: is bandwidth uniform?

  double *means;                  // Weighted mean of points from this level down     
  double *bandwidth;              // Variance or other multiscale bandwidth
  double *bandwidthMax,*bandwidthMin; // Bounds on BW in non-uniform case
  
  // Internal evaluate functions:
  //   Recursive tree evaluation
  const static index DirectSize = 100;        // if N*M is less than this, just compute.

  void evaluate(BallTree::index myRoot, const BallTree& atTree, BallTree::index aRoot, double maxErr) const;
  void evalDirect(BallTree::index myRoot, const BallTree& atTree, BallTree::index aRoot) const;

  void llGradDirect(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot, Gradient) const;
  void llGradRecurse(BallTree::index dRoot,const BallTree& atTree, BallTree::index aRoot, double tolGrad, Gradient) const;
  void llGradWDirect(index dRoot, const BallTree& atTree, index aRoot) const;
  void llGradWRecurse(index dRoot,const BallTree& atTree, index aRoot, double tolGrad) const;


  //   Bounds on kernel values between points in this subtree & another
  double maxDistKer(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const { 
    switch(getType()) 
      { case Gaussian:       return maxDistGauss(dRoot,atTree,aRoot);
        case Laplacian:      return maxDistLaplace(dRoot,atTree,aRoot); 
        case Epanetchnikov:  return maxDistEpanetch(dRoot,atTree,aRoot); 
        default: assert(false);
      }
      return -1;
    };
  double minDistKer(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const {
    switch(getType()) 
      { case Gaussian:       return minDistGauss(dRoot,atTree,aRoot);
        case Laplacian:      return minDistLaplace(dRoot,atTree,aRoot); 
        case Epanetchnikov:  return minDistEpanetch(dRoot,atTree,aRoot); 
        default: assert(false);
      }
      return -1;
     
    };

  // Types of kernels supported
  double maxDistLaplace(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const;
  double minDistLaplace(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const;
  double maxDistGauss(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const;
  double minDistGauss(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const;
  double maxDistEpanetch(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot, int dim=-1) const;
  double minDistEpanetch(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot, int dim=-1) const;

  void  dKdX_p(BallTree::index dRoot,const BallTree& atTree, BallTree::index aRoot, bool bothLeaves, Gradient) const;

};

#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/BallTreeDensityClass.cc
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 //////////////////////////////////////////////////////////////////////////////////////
// KD-tree code extended for use in kernel density estimation
//////////////////////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//
//////////////////////////////////////////////////////////////////////////////////////
//#define MEX
//#define NEWVERSION
#include <math.h>
#include <assert.h>

#ifdef MEX
#include "mex.h"
#endif
#include "BallTreeDensity.h"


double *pMin, *pMax;                // need to declare these here, for kernel 
double **pAdd, *pErr;
double *min, *max;                  //   derivative functions in kernel.h

#include "kernels.h"                // min&max kernel bounds for various kernels

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// EVALUATION
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

void pushDownLocal(const BallTree& atTree, const BallTree::index aRoot)
{
  BallTree::index close;
    if (!atTree.isLeaf(aRoot)) {
      close = atTree.left(aRoot); 
      if (close != BallTree::NO_CHILD) pAdd[0][close] += pAdd[0][aRoot];
      close = atTree.right(aRoot); 
      if (close != BallTree::NO_CHILD) pAdd[0][close] += pAdd[0][aRoot];
      pAdd[0][aRoot] = 0;
    }
}
void pushDownAll(const BallTree& locations)
{
  BallTree::index j;
  for (j=locations.root(); j<locations.leafFirst(locations.root())-1; j++) {
      pAdd[0][locations.left(j)] += pAdd[0][j];
      pAdd[0][locations.right(j)] += pAdd[0][j];
      pAdd[0][j] = 0;
    }
    for (j=locations.leafFirst(locations.root()); j<=locations.leafLast(locations.root()); j++) {
      pMin[j] += pAdd[0][j] - pErr[j];
      pMax[j] += pAdd[0][j] + pErr[j];
      pAdd[0][j] = 0; pErr[j] = 0; 
    }
}
void recurseMinMax(const BallTree& atTree, const BallTree::index aRoot)
{
  BallTree::index l,r; l = atTree.left(aRoot); r = atTree.right(aRoot);
  if (!atTree.isLeaf(l)) recurseMinMax(atTree,l);
  if (!atTree.isLeaf(r)) recurseMinMax(atTree,r);
  pMin[aRoot] = pMin[l]; pMax[aRoot] = pMax[l];
  if (pMin[aRoot] > pMin[r]) pMin[aRoot] = pMin[r];
  if (pMax[aRoot] < pMax[r]) pMax[aRoot] = pMax[r];
}
/////////////////////////////////////////////////////////////////////
// Recursively evaluate the density implied by the samples of the 
// subtree (rooted at dRoot) of densTree at the locations given by
// the subtree (rooted at aRoot) of *this, to within the error 
// percentage "maxErr"
/////////////////////////////////////////////////////////////////////

void BallTreeDensity::evaluate(BallTree::index dRoot,
              const BallTree& atTree, BallTree::index aRoot, 
              double maxErr) const
{
  BallTree::index k, close, far;
  double Kmin,Kmax,add,total;

  // find the minimum and maximum effect of these two balls on each other
  Kmax = minDistKer(dRoot, atTree, aRoot);
  Kmin = maxDistKer(dRoot, atTree, aRoot);

  total = pMin[ aRoot ];		   	     // take pmin of data below this level
#ifdef NEWVERSION
  total += pAdd[0][aRoot] - pErr[aRoot]; // add lower bound from local expansion
#endif
  total += weight(dRoot)*Kmin;           // also add minimum for this block

  // if the weighted contribution of this multiply is below the
  //    threshold, no need to recurse; just treat as constant
  //// //if ( Kmax - Kmin <= maxErr) {                    // APPROXIMATE: ABSOLUTE
  if ( Kmax - Kmin <= maxErr * total) {                    // APPROXIMATE: PERCENT
    Kmin *= weight(dRoot); Kmax *= weight(dRoot);

    if (this == &atTree && aRoot==dRoot) {                 // LEAVE-ONE-OUT (and same subtree)
      for (k=atTree.leafFirst(aRoot); k<=atTree.leafLast(aRoot); k++){
        pMin[k] += Kmin * (1 - weight(k)/weight(dRoot));   // leave our weight out of it
        pMax[k] += Kmax * (1 - weight(k)/weight(dRoot));   // 
      }
      recurseMinMax(atTree,aRoot);
    } else {                                               //     NO L-O-O => just add away
#ifdef NEWVERSION
      pAdd[0][aRoot] += (Kmin + Kmax)/2; pErr[aRoot] = (Kmax-Kmin)/2;
#else
      // !!! Should *not* do this -- instead add to local expansion (constant term)
      for (k=atTree.leafFirst(aRoot); k<=atTree.leafLast(aRoot); k++) {
        pMin[k] += Kmin;
        pMax[k] += Kmax;
      }
#endif
      if (!atTree.isLeaf(aRoot)) { pMin[aRoot] += Kmin; pMax[aRoot] += Kmax; }
    }

  } else if (Npts(dRoot)*atTree.Npts(aRoot)<=DirectSize){  // DIRECT EVALUATION
    evalDirect(dRoot,atTree,aRoot);
  } else if (0) {                                          // FAST GAUSS APPROX
    // if FGTTerms > 0 : have computed Hermite expansions of densTree (sigma uniform)
    // if FGTError(dRoot->Nterms,minDistDtoA,sigma) < maxError * total
    //  (if maxError, sigma, Nterms known, compute R0 & check >= minDist)
    //   translate dRoot's hermite expansion to a local expansion around aRoot
    //   Need to iterate over aRoot's leaves & evaluate?  (N log N)
    //   Update pMin structure...
  } else {                                                 // RECURSE ON SUBTREES

#ifdef NEWVERSION
    // push any local expansion
    pushDownLocal(atTree,aRoot);
#endif

    // Find the subtree in closest to the other tree's left child and do 
    // that first so that the values are higher and there is a better
    // chance of being able to skip a recursion.
    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, left(dRoot)); 
    if (left(dRoot) != NO_CHILD && close != NO_CHILD)
      evaluate(left(dRoot), atTree, close, maxErr); 
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (left(dRoot) != NO_CHILD && far != NO_CHILD)
      evaluate(left(dRoot), atTree, far, maxErr); 

    // Now the same thing for the density's right child    
    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, right(dRoot)); 
    if (right(dRoot) != NO_CHILD && close != NO_CHILD) 
      evaluate(right(dRoot), atTree, close, maxErr); 
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (right(dRoot) != NO_CHILD && far != NO_CHILD) 
      evaluate(right(dRoot), atTree, far, maxErr); 

    // Propogate additions in children's minimum value to this node
    if (!atTree.isLeaf(aRoot)) {
      pMin[aRoot] = pMin[ atTree.left(aRoot) ]; 
      pMax[aRoot] = pMax[ atTree.left(aRoot) ];
      if (atTree.right(aRoot) != NO_CHILD) {
        if (pMin[aRoot] > pMin[ atTree.right(aRoot) ])
          pMin[aRoot] = pMin[ atTree.right(aRoot) ];
        if (pMax[aRoot] < pMax[ atTree.right(aRoot) ])
          pMax[aRoot] = pMax[ atTree.right(aRoot) ];
      }
    }

  }
}

///////////////////////////////////////////
// Maybe we just want to evaluate this stuff directly.
///////////////////////////////////////////
void BallTreeDensity::evalDirect(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot) const
{
  BallTree::index i,j;
  bool firstFlag = true;
  double minVal=2e22, maxVal=0;

  for (j=atTree.leafFirst(aRoot); j<=atTree.leafLast(aRoot); j++) {
    for (i=leafFirst(dRoot); i<=leafLast(dRoot); i++) {
      if (this != &atTree || i!=j) {               // Check leave-one-out condition;
        double d = weight(i) * maxDistKer(i,atTree,j);  //  Do direct N^2 kernel evaluation
        //if (this == &atTree) d /= 1-weight(j);     // leave-one-out => renormalize weights
        pMin[j] += d;
        pMax[j] += d;
      }
    }
#ifdef NEWVERSION
  }
  recurseMinMax(atTree,aRoot);              // pass up min (& max) value for pruning
#else
  if (pMin[j] < minVal) minVal = pMin[j];   // determine min & max value in this block
  if (pMax[j] > maxVal) maxVal = pMax[j];   
  }
  pMin[aRoot] = minVal; pMax[aRoot] = maxVal;
#endif
}

/////////////////////////////////////////////////////////////////////
// Dual Tree evaluation: estimate the values at this ball tree's
// points given the other tree as the samples from a distribution.
/////////////////////////////////////////////////////////////////////
void BallTreeDensity::evaluate(const BallTree& locations, double* p, double maxErr) const
{
  BallTree::index j;
  
  assert(Ndim() == locations.Ndim());
  assert(p != NULL);

  pMin = new double[2*locations.Npts()];
  pMax = new double[2*locations.Npts()];
  for (j=0;j<2*locations.Npts();j++) pMin[j] = pMax[j] = 0;
#ifdef NEWVERSION
  pAdd = new double*[1]; pAdd[0] = new double[2*locations.Npts()];
  pErr = new double[2*locations.Npts()];
  for (j=0;j<2*locations.Npts();j++) pAdd[0][j] = pErr[j] = 0;
#endif
  
  evaluate(root(), locations, locations.root(), 2*maxErr);

  // Compute & account for the kernel f'ns normalization constant
  double norm = 1;
  switch(getType()) {
    case Gaussian:  norm = pow(2*PI, ((double)Ndim())/2 );
                    if (bwUniform()) 
                      for (unsigned int i=0;i<Ndim();i++) norm *= sqrt(bandwidthMax[i]);
                    break;
    case Laplacian: norm = pow(2, ((double)Ndim()) );
                    if (bwUniform()) 
                      for (unsigned int i=0;i<Ndim();i++) norm *= bandwidthMax[i];
                    break;
    case Epanetchnikov: norm = pow(4.0/3, ((double)Ndim()) );
                    if (bwUniform()) 
                      for (unsigned int i=0;i<Ndim();i++) norm *= bandwidthMax[i];
                    break;
  }

  BallTree::index lRoot = locations.root();
#ifdef NEWVERSION
  pushDownAll(locations);
#endif
  if (this == &locations) {                          // if we need to do leave-one-out
    for (j=locations.leafFirst(lRoot); j<=locations.leafLast(lRoot); j++)
      p[locations.getIndexOf(j)] = .5*(pMin[j]+pMax[j])/norm/(1-weight(j));
  } else {
    for (j=locations.leafFirst(lRoot); j<=locations.leafLast(lRoot); j++)
      p[locations.getIndexOf(j)] = .5*(pMin[j]+pMax[j])/norm;
  }

  delete[] pMin; delete[] pMax; 
#ifdef NEWVERSION
  delete[] pAdd[0]; delete[] pAdd;
#endif
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// GRADIENT CALCULATION
//
//    Recursively evaluate the derivative of log-likelihood for two trees
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
double *gradD, *gradA;

////////////////////////////////////////////////////////////////////////////////////
// DIRECT VERSION:
//   Just iterate over the N^2 indices; faster than recursion for small N.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradDirect(BallTree::index dRoot, const BallTree& atTree, BallTree::index aRoot, Gradient gradWRT) const
{
  BallTree::index i,j;
  unsigned int k;

  for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
    for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
      if (this != &atTree || i!=j) {               // Check leave-one-out condition;
        index Nj = Ndim() * getIndexOf(j);
        index Ni = atTree.Ndim() * atTree.getIndexOf(i);
        dKdX_p(j,atTree,i,true,gradWRT);            // use "true" to signal leaf evaluation
        if (gradD) for (k=0;k<Ndim();k++) {
          gradD[Nj+k] -= weight(j) * atTree.weight(i) * (max[k]+min[k])/2;
        }
        if (gradA) for (k=0;k<Ndim();k++) {
          gradA[Ni+k] += weight(j) * atTree.weight(i) * (max[k]+min[k])/2;
        }
  } } }
}

////////////////////////////////////////////////////////////////////////////////////
// RECURSIVE VERSION:
//   Try to find approximations to speed things up.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradRecurse(BallTree::index dRoot,const BallTree& atTree, BallTree::index aRoot, double tolGrad, Gradient gradWRT) const
{
  BallTree::index i,j,close,far;
  unsigned int k; 

  dKdX_p(dRoot,atTree,aRoot,false,gradWRT);      // "false" signals maybe not leaf nodes
  double norm = 0;
  for (k=0;k<Ndim();k++) norm += .25*(max[k]-min[k])*(max[k]-min[k]);

  if (norm <= tolGrad) {     // IF OUR APPROXIMATION IS GOOD ENOUGH, ...
    if (this == &atTree && aRoot==dRoot) {                 // LEAVE-ONE-OUT (and same subtree)
      if (gradD) for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
        index Nj = Ndim() * getIndexOf(j);
        for (k=0;k<Ndim();k++)
          gradD[Nj+k] -= (atTree.weight(aRoot)-atTree.weight(j)) * weight(j) * (max[k]+min[k])/2;
      }
      if (gradA) for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
        index Ni = atTree.Ndim() * atTree.getIndexOf(i);
        for (k=0;k<Ndim();k++)
          gradA[Ni+k] += atTree.weight(i) * (weight(dRoot)-weight(i)) * (max[k]+min[k])/2;
      }
    } else {                                              // NO LOO; just regular
      if (gradD) for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
        index Nj = Ndim() * getIndexOf(j);
        for (k=0;k<Ndim();k++)
          gradD[Nj+k] -= atTree.weight(aRoot) * weight(j) * (max[k]+min[k])/2;
      }
      if (gradA) for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
        index Ni = atTree.Ndim() * atTree.getIndexOf(i);
        for (k=0;k<Ndim();k++)
          gradA[Ni+k] += atTree.weight(i) * weight(dRoot) * (max[k]+min[k])/2;
      }
    } 
                          // OR, IF THERE ARE VERY FEW POINTS
  } else if (Npts(dRoot)*atTree.Npts(aRoot)<=DirectSize){  // DIRECT EVALUATION
    llGradDirect(dRoot,atTree,aRoot,gradWRT);
    
  } else {
    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, left(dRoot)); 
    if (left(dRoot) != NO_CHILD && close != NO_CHILD)
      llGradRecurse(left(dRoot),atTree,close,tolGrad,gradWRT);
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (left(dRoot) != NO_CHILD && far != NO_CHILD)
      llGradRecurse(left(dRoot),atTree,far,tolGrad,gradWRT);

    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, right(dRoot)); 
    if (right(dRoot) != NO_CHILD && close != NO_CHILD) 
      llGradRecurse(right(dRoot),atTree,close,tolGrad,gradWRT);
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (right(dRoot) != NO_CHILD && far != NO_CHILD) 
      llGradRecurse(right(dRoot),atTree,far,tolGrad,gradWRT);
  }
}

////////////////////////////////////////////////////////////////////////////////////
//   L = sum_i wi log p(yi) = sum_i wi log[ sum_j wj K(yi-xj) ]
//  =>  d(log L)/dxj[k] = - sum_i wi 1/p(yi) wj K'(xj-yi)
//      d(log L)/dyi[k] = wi 1/p(yi) sum_j wj K'(xj-yi)     (same K')
//
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGrad(const BallTree& locations, double* _gradD, double* _gradA, double tolEval, double tolGrad, Gradient gradWRT) const
{
  BallTree::index j, k;
  gradD = _gradD; gradA = _gradA;

  min = new double[locations.Ndim()]; max = new double[locations.Ndim()];
  pMin = new double[2*locations.Npts()];
  pMax = new double[2*locations.Npts()];
  for (j=0;j<2*locations.Npts();j++) pMin[j] = pMax[j] = 0;
#ifdef NEWVERSION
  pAdd = new double*[1]; pAdd[0] = new double[2*locations.Npts()];
  pErr = new double[2*locations.Npts()];
  for (j=0;j<2*locations.Npts();j++) pAdd[0][j] = pErr[j] = 0;
#endif
  evaluate(root(), locations, locations.root(), 2*tolEval);
#ifdef NEWVERSION
  pushDownAll(locations);  
#endif
  if (this == &locations) {                          // fix leave-one-out normalization
    for (j=leafFirst(root()); j<=leafLast(root()); j++)
      pMax[j] /= (1-weight(j)); pMin[j] /= (1-weight(j));
  }

  if(gradWRT == WRTWeight)
    llGradWRecurse(root(),locations,locations.root(), tolGrad*tolGrad);
  else
    llGradRecurse(root(),locations,locations.root(), tolGrad*tolGrad, gradWRT);

  if (this == &locations) {                          // fix leave-one-out normalization
    for (j=leafFirst(root()); j<=leafLast(root()); j++) {
      index Nj = Ndim() * getIndexOf(j);
      for (k=0;k<Ndim();k++) {
        if (gradD) gradD[Nj+k] /= (1-weight(j)); 
		if (gradA) gradA[Nj+k] /= (1-weight(j));
  } } }

  delete[] min; delete[] max; 
  delete[] pMax; delete[] pMin;
#ifdef NEWVERSION
  delete[] pAdd[0]; delete[] pAdd; delete[] pErr;
#endif
}


////////////////////////////////////////////////////////////////////////////////////
// Gradient wrt WEIGHT 
// DIRECT VERSION:
//   Just iterate over the N^2 indices; faster than recursion for small N.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradWDirect(BallTree::index dRoot, const BallTree& atTree, 
				    BallTree::index aRoot) const
{
  BallTree::index i,j;

  for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
    for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
      dKdX_p(j,atTree,i,true,WRTWeight);            // use "true" to signal leaf evaluation
      if (gradD)
        gradD[getIndexOf(j)] -= atTree.weight(i) * (max[0]+min[0])/2;
      if (gradA)
        gradA[atTree.getIndexOf(i)] += weight(j) * (max[0]+min[0])/2;
    } 
  }
}

////////////////////////////////////////////////////////////////////////////////////
// Gradient wrt WEIGHT 
// RECURSIVE VERSION:
//   Try to find approximations to speed things up.
////////////////////////////////////////////////////////////////////////////////////
void BallTreeDensity::llGradWRecurse(BallTree::index dRoot,const BallTree& atTree, 
				     BallTree::index aRoot, double tolGrad) const
{
  BallTree::index i,j,close,far;

  dKdX_p(dRoot,atTree,aRoot,false,WRTWeight);      // "false" signals maybe not leaf nodes
  double norm = (max[0]-min[0]) * (max[0]-min[0]);

  if (norm <= tolGrad) {
    if (gradD) for (j=leafFirst(dRoot);j<=leafLast(dRoot);j++) {
      gradD[getIndexOf(j)] -= atTree.weight(aRoot) * (max[0]+min[0])/2;
    }
    if (gradA) for (i=atTree.leafFirst(aRoot);i<=atTree.leafLast(aRoot);i++) {
      gradA[atTree.getIndexOf(i)] += weight(dRoot) * (max[0]+min[0])/2;
    }
    
  } else if (Npts(dRoot)*atTree.Npts(aRoot)<=100){  // DIRECT EVALUATION
    llGradWDirect(dRoot,atTree,aRoot);
    
  } else {
    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, left(dRoot)); 
    if (left(dRoot) != NO_CHILD && close != NO_CHILD)
      llGradWRecurse(left(dRoot),atTree,close,tolGrad);
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (left(dRoot) != NO_CHILD && far != NO_CHILD)
      llGradWRecurse(left(dRoot),atTree,far,tolGrad);

    close = atTree.closer( atTree.left(aRoot), atTree.right(aRoot), *this, right(dRoot)); 
    if (right(dRoot) != NO_CHILD && close != NO_CHILD) 
      llGradWRecurse(right(dRoot),atTree,close,tolGrad);
    far   = (close == atTree.left(aRoot)) ? atTree.right(aRoot) : atTree.left(aRoot);
    if (right(dRoot) != NO_CHILD && far != NO_CHILD) 
      llGradWRecurse(right(dRoot),atTree,far,tolGrad);
  }
}


///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
//
// CONSTRUCTION METHODS
//
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////


#ifdef MEX
// Load the arrays already allocated in matlab from the given
// structure.
BallTreeDensity::BallTreeDensity(const mxArray* structure) : BallTree(structure) {

  means     = mxGetPr(mxGetField(structure,0,"means"));
  bandwidth = (double*) mxGetData(mxGetField(structure,0,"bandwidth"));
  type = (BallTreeDensity::KernelType) mxGetScalar(mxGetField(structure,0,"type")); 

  if (mxGetN(mxGetField(structure,0,"bandwidth")) == 6*num_points) {
    multibandwidth = 1;
    bandwidthMax = bandwidth + 2*num_points*dims;       // not all the same =>
    bandwidthMin = bandwidthMax + 2*num_points*dims;    //   track min/max vals
  } else {                                              // all the same => min = max
    multibandwidth = 0;                                 //         = any leaf node
    bandwidthMax = bandwidthMin = bandwidth + num_points*dims;
  }
}


// Create new matlab arrays and put them in the given structure
mxArray* BallTreeDensity::createInMatlab(const mxArray* _pointsMatrix, const mxArray* _weightsMatrix,
                                         const mxArray* _bwMatrix,BallTreeDensity::KernelType _type)
{
  mxArray* structure = matlabMakeStruct(_pointsMatrix, _weightsMatrix,_bwMatrix,_type);
  BallTreeDensity dens(structure);
  if (dens.Npts() > 0) dens.buildTree();

  return structure;
}

// Create new matlab arrays and put them in the given structure.
mxArray* BallTreeDensity::matlabMakeStruct(const mxArray* _pointsMatrix, const mxArray* _weightsMatrix,
                                           const mxArray* _bwMatrix,BallTreeDensity::KernelType _type)
{
  BallTree::index i,j;

  mxArray* structure = BallTree::matlabMakeStruct(_pointsMatrix, _weightsMatrix);

  unsigned int Nd = (unsigned int) mxGetScalar(mxGetField(structure,0,"D"));
  index Np = (BallTree::index) mxGetScalar(mxGetField(structure,0,"N"));

  mxAddField(structure, "means");
  mxSetField(structure, 0, "means", mxCreateDoubleMatrix(Nd, 2*Np, mxREAL));

  mxAddField(structure, "bandwidth");
  if (mxGetN(_bwMatrix) == 1)
    mxSetField(structure, 0, "bandwidth",     mxCreateDoubleMatrix(Nd, 2*Np, mxREAL));
  else
    mxSetField(structure, 0, "bandwidth",     mxCreateDoubleMatrix(Nd, 6*Np, mxREAL));

  mxAddField(structure, "type");
    mxSetField(structure, 0, "type",          mxCreateDoubleScalar((double)_type));

  // initialize arrays
  double* means = (double *) mxGetData(mxGetField(structure, 0, "means"));
  double* points = (double *) mxGetData(mxGetField(structure, 0, "centers"));
  for (j=0,i=Nd*Np; j<Nd*Np; i++,j++)
    means[i] = points[i];
  double* bw = (double *) mxGetData(mxGetField(structure, 0, "bandwidth"));
  double* bwIn = (double *) mxGetData(_bwMatrix);
  if (mxGetN(_bwMatrix) == 1) {
    for (j=0,i=Nd*Np; j<Nd*Np; i++,j++)
      bw[i] = bwIn[j%Nd];
  } else {
    double *bwMax, *bwMin; bwMax = bw + 2*Np*Nd; bwMin = bwMax + 2*Np*Nd;
    for (j=0,i=Nd*Np; j<Nd*Np; i++,j++)
      bwMax[i] = bwMin[i] = bw[i] = bwIn[j];
  }  

  return structure;
}
#else
// Load the arrays already allocated in matlab from the given
// structure.
BallTreeDensity::BallTreeDensity(const kde& structure) : BallTree(structure) {

  //means     = mxGetPr(mxGetField(structure,0,"means"));
  //itpp::vec temp = structure.centers.get_row(0); 
  //means = vec2vec(&temp);
  means = new double[2*dims*num_points];
  for (int i=0; i<num_points; i++)
     means[i+num_points*dims] = structure.centers.get_row(0).get(i); //temp(i);

 
  type = Gaussian;  //DB: no support for other kernels!


  itpp::vec tmp = structure.bw.get_row(0);
  if (structure.bw.size() > 1){
    itpp::Sort<double> mysort;
    mysort.sort(0,tmp.size()-1,tmp);
  }

  //if (mxGetN(mxGetField(structure,0,"bandwidth")) == 6*num_points) {
  if (structure.bw.size() > 1 && tmp(0) < tmp(tmp.size()-1)){ 
    assert(false);
    multibandwidth = 1;
    bandwidthMax = bandwidth + 2*num_points*dims;       // not all the same =>
    bandwidthMin = bandwidthMax + 2*num_points*dims;    //   track min/max vals
  } else {                                              // all the same => min = max
     bandwidth = new double[2*dims*num_points];
     for (int i=0; i<num_points; i++)
       bandwidth[i+num_points*dims] = structure.bw.get_row(0).get(i); //temp(i);

     multibandwidth = 0;                                 //         = any leaf node
     bandwidthMax = bandwidthMin = bandwidth + num_points*dims;
  }

  buildTree();
}

#endif

// returns true on success, false on failure
bool BallTreeDensity::updateBW(const double* newBWs, index N)
{
  if((N == num_points && multibandwidth == 0) || 
     (N == 1 && multibandwidth == 1)) {
//     mexPrintf("multibandwidth=%d, num_points=%d, N=%d\n", multibandwidth, num_points, N);
    return false;
  }

  index i,j;
  // pointers all stay the same, just copy data over
  if (N == 1) {
    for (j=0,i=dims*num_points; j<dims*num_points; i++,j++)
      bandwidth[i] = newBWs[j%dims];
  } else {
    double *bwMax, *bwMin; 
    bwMax = bandwidth + 2*num_points*dims; 
    bwMin = bwMax + 2*num_points*dims;
    for (j=0,i=dims*num_points; j<dims*num_points; i++,j++)
      bwMax[i] = bwMin[i] = bandwidth[i] = newBWs[j];
  }  

  // calculate bandwidths for non-leaf nodes
  for (i=num_points-1; i != 0; i--)
    calcStats(i);
  calcStats(root());
  return true;
}

void BallTreeDensity::calcStats(BallTree::index root)
{
  BallTree::calcStats(root);

  BallTree::index Ni, NiL, NiR;
  double wtL,wtR,wtT;
  unsigned int k;

  BallTree::index leftI = left(root), rightI=right(root);   // get children indices 
  if (!validIndex(leftI) || !validIndex(rightI)) return;    // nothing to do if this
                                                            //   isn't a parent node
  Ni  = dims*root;   NiL = dims*leftI;   NiR = dims*rightI;
  wtL = weight(leftI); wtR = weight(rightI); wtT = wtL + wtR + DBL_EPSILON;
  wtL /= wtT; wtR /= wtT;

  if (!bwUniform()) {
    for(k = 0; k < dims; k++) {
      bandwidthMax[Ni+k] = (bandwidthMax[NiL+k] > bandwidthMax[NiR+k]) 
                              ? bandwidthMax[NiL+k] : bandwidthMax[NiR+k];
      bandwidthMin[Ni+k] = (bandwidthMin[NiL+k] < bandwidthMin[NiR+k]) 
                              ? bandwidthMin[NiL+k] : bandwidthMin[NiR+k];
  } }

  switch(type) {
  case Gaussian:
    for(unsigned int k=0; k < dims; k++) {
      means[Ni+k]     = wtL * means[NiL+k] + wtR * means[NiR+k];
      bandwidth[Ni+k] = wtL* (bandwidth[NiL+k] + means[NiL+k]*means[NiL+k]) +
                        wtR* (bandwidth[NiR+k] + means[NiR+k]*means[NiR+k]) -
                        means[Ni+k]*means[Ni+k];
    }; break;
  case Laplacian:
    for(unsigned int k=0; k < dims; k++) {
      means[Ni+k]     = wtL * means[NiL+k] + wtR * means[NiR+k];
      bandwidth[Ni+k] = wtL* (2*bandwidth[NiL+k]*bandwidth[NiL+k] + means[NiL+k]*means[NiL+k]) +
                        wtR* (2*bandwidth[NiR+k]*bandwidth[NiR+k] + means[NiR+k]*means[NiR+k]) -
                        means[Ni+k]*means[Ni+k];     // compute in terms of variance
      bandwidth[Ni+k] = sqrt(.5*bandwidth[Ni+k]);    //  then convert back to normal BW rep.
    }; break;
  case Epanetchnikov:
    for(unsigned int k=0; k < dims; k++) {
      means[Ni+k]     = wtL * means[NiL+k] + wtR * means[NiR+k];
      bandwidth[Ni+k] = wtL* (.2*bandwidth[NiL+k]*bandwidth[NiL+k] + means[NiL+k]*means[NiL+k]) +
                        wtR* (.2*bandwidth[NiR+k]*bandwidth[NiR+k] + means[NiR+k]*means[NiR+k]) -
                        means[Ni+k]*means[Ni+k];     // compute in terms of variance
      bandwidth[Ni+k] = sqrt(5*bandwidth[Ni+k]);     //  then convert back to normal BW rep.
    }; break; 
 }

}

// Swap the ith leaf with the jth leaf.
void BallTreeDensity::swap(BallTree::index i, BallTree::index j) 
{
  if (i==j) return;

  BallTree::swap(i,j);

  i *= dims;  j *= dims;
  for(unsigned int k=0; k<dims; i++,j++,k++) {
    double tmp;
    tmp = means[i];       means[i]      = means[j];       means[j]      = tmp;
    tmp = bandwidth[i];   bandwidth[i]  = bandwidth[j];   bandwidth[j]  = tmp;
    if (!bwUniform()) {
      tmp = bandwidthMax[i];bandwidthMax[i]=bandwidthMax[j];bandwidthMax[j]=tmp;
      tmp = bandwidthMin[i];bandwidthMin[i]=bandwidthMin[j];bandwidthMin[j]=tmp;
    }
  }
}


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/NOTICE
================================================

The code in this directory, was written by Alex Ihler as a part of Matlab KDE toolbox.
You can obtain the original code from: http://www.ics.uci.edu/~ihler/code/kde.html


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/cpp/kernels.h
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. */

 /////////////////////////////////////////////////////////////////////
// Find the kernel values at the minimum or maximum possible distance 
// between points in the aRoot-th ball in atTree 
// and the dRoot-th ball in the densTree 
// 3 Possible Kernels : Gaussian, Laplacian, Epanetchnikov
//   Takes account of possible non-uniform bandwidth values
/////////////////////////////////////////////////////////////////////
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
//
/////////////////////////////////////////////////////////////////////

double BallTreeDensity::minDistGauss(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot) const
{
  unsigned int k;
  double tmp,result=0;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMax(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp-= atTree.range(aRoot)[k] + range(dRoot)[k];
    tmp = (tmp > 0) ? tmp : 0;
    if (bwUniform()) result -= (tmp*tmp)/bw[k];
    else             result -= (tmp*tmp)/bw[k] + log(bwMin(dRoot)[k]);
  }
  result = exp(result/2);
  return result;
}

double BallTreeDensity::maxDistGauss(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot) const
{
  unsigned int k;
  double tmp,result=0;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMin(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp+= atTree.range(aRoot)[k] + range(dRoot)[k];
    if (bwUniform()) result -= (tmp*tmp)/bw[k];
    else             result -= (tmp*tmp)/bw[k] + log(bwMax(dRoot)[k]);
  }
  result = exp(result/2);
  return result;
}

///////////////////////////////////////////////////////////////////
//  Laplacian Kernel (double exponential)
///////////////////////////////////////////////////////////////////

double BallTreeDensity::minDistLaplace(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot) const
{
  unsigned int k;
  double tmp,result=0;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMax(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp-= atTree.range(aRoot)[k] + range(dRoot)[k];
    tmp = (tmp > 0) ? tmp : 0;
    if (bwUniform()) result -= tmp/bw[k];
    else             result -= tmp/bw[k] + log(bwMin(dRoot)[k]);
  }
  result = exp(result);
  return result;
}

double BallTreeDensity::maxDistLaplace(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot) const
{
  unsigned int k;
  double tmp,result=0;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMin(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp+= atTree.range(aRoot)[k] + range(dRoot)[k];
    if (bwUniform()) result -= tmp/bw[k];
    else             result -= tmp/bw[k] + log(bwMax(dRoot)[k]);
  }
  result = exp(result);
  return result;
}

///////////////////////////////////////////////////////////////////
//  Epanetchnikov Kernel (truncated quadratic)
//
// slightly hacked -- dim is the dimension to leave out (only compute
//   1 if in bounds, 0 if not), necc. for product kernel derivatives.
///////////////////////////////////////////////////////////////////

double BallTreeDensity::minDistEpanetch(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot, int dim) const
{
  unsigned int k;
  double tmp,result=1;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMax(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp-= atTree.range(aRoot)[k] + range(dRoot)[k];
    tmp = (tmp > 0) ? tmp : 0;
    tmp = (tmp > bw[k]) ? bw[k] : tmp;
    if (k==dim) { if (tmp==bw[k]) result=0; continue;}
    if (bwUniform()) result *= 1-(tmp/bw[k])*(tmp/bw[k]);
    else             result *= (1-(tmp/bw[k])*(tmp/bw[k]))/bwMin(dRoot)[k];
  }
  return result;
}

double BallTreeDensity::maxDistEpanetch(BallTree::index dRoot,
                  const BallTree& atTree, BallTree::index aRoot, int dim) const
{
  unsigned int k;
  double tmp,result=1;
  const double *atCenter, *densCenter, *bw;

  atCenter = atTree.center(aRoot); densCenter = center(dRoot);
  bw = bwMin(dRoot);
  for (k=0; k<atTree.Ndim(); k++) {
    tmp = fabs( atCenter[k] - densCenter[k] );
    tmp+= atTree.range(aRoot)[k] + range(dRoot)[k];
    tmp = (tmp > bw[k]) ? bw[k] : tmp;
    if (k==dim) { if (tmp==bw[k]) result=0; continue;}
    if (bwUniform()) result *= 1-(tmp/bw[k])*(tmp/bw[k]);
    else             result *= (1-(tmp/bw[k])*(tmp/bw[k]))/bwMax(dRoot)[k];
  }
  return result;
}


/////////////////////////////////////////////////////////////////////
// Find upper and lower bounds on 1/p(yj) K'(xi-yj)
//   for any points yj in the aRoot-th ball of atTree 
//              and xi in the dRoot-th ball of densTree 
// 3 Possible Kernels : Gaussian, Laplacian, Epanetchnikov
//   Takes account of possible non-uniform bandwidth values
/////////////////////////////////////////////////////////////////////

void BallTreeDensity::dKdX_p(BallTree::index dRoot,const BallTree& atTree, 
			     BallTree::index aRoot, bool bothLeaves, 
			     Gradient gradType) const
{
  // Compute a maximum value of K'(yi-xj) for any pair: xj in dRoot, yi in aRoot
  //
  // e.g. Gaussian:  <----- K ------------------->  <------- D ------->
  //  K'(x) = exp(-sum( .5*(a[m]-d[m])^2/bw[m] ) ) * (a[k]-d[k])/bw[k]
  //
  // Crappy bound is  [ min(Kmax*Dmin,Kmin*Dmin), max(Kmax*Dmax,Kmin*Dmin) ]
  //
  const double *atCenter, *densCenter;
  double Kmin,Kmax;
  atCenter = atTree.center(aRoot); densCenter = center(dRoot);

//  printf("%d:%d \n",dRoot,aRoot);

  if (getType()!=Epanetchnikov) {                // for the exponential forms:
    if (!bothLeaves) {
      Kmin = maxDistKer(dRoot,atTree,aRoot);     // if non-leaf node need both
      Kmax = minDistKer(dRoot,atTree,aRoot);     //  values;
    } else                                         // leaf nodes, we know they're equal
      Kmax = Kmin = maxDistKer(dRoot,atTree,aRoot);//  so don't double-compute
  }
  
  for(unsigned int k=0;k<Ndim();k++) {
    double tmp = atCenter[k] - densCenter[k];
    double Dmax = tmp + range(dRoot)[k] + atTree.range(aRoot)[k];  // compute extremum of arguments
    double Dmin = tmp - range(dRoot)[k] - atTree.range(aRoot)[k];

    if (getType() == Epanetchnikov) {                        // non-exponential form
      Kmin = 2*maxDistEpanetch(dRoot,atTree,aRoot,k);        //  => leave out k^th dim
      Kmax = 2*minDistEpanetch(dRoot,atTree,aRoot,k);        //  when calculating
      if (!bwUniform()) { Kmax /= bwMin(dRoot)[k]; Kmin /= bwMax(dRoot)[k]; }
    }    

    if (getType() == Laplacian) {                              // non-quadratic form
      if (Dmin < 0) Dmin = -1;  if (Dmax < 0) Dmax = -1;       //  => sign(x-y) instead
      if (Dmin > 0) Dmin = +1;  if (Dmax > 0) Dmax = +1;       //  of (x-y)
    }                                                          

    double bwmax = bwMax(dRoot)[k], bwmin = bwMin(dRoot)[k];
    if (getType() == Epanetchnikov) {
      bwmax *= bwmax; bwmin *= bwmin;
    }

    if (gradType == WRTMean) {
      if (Dmin < 0)
	max[k] = -Kmax*Dmin/bwmin/pMin[aRoot];
      else
	max[k] = -Kmin*Dmin/bwmax/pMax[aRoot]; 
      
      if (Dmax < 0)
	min[k] = -Kmin*Dmax/bwmax/pMax[aRoot];
      else
	min[k] = -Kmax*Dmax/bwmin/pMin[aRoot];

    } else if(gradType == WRTWeight) {
      max[k] = -Kmax / pMin[aRoot];
      min[k] = -Kmin / pMax[aRoot];
      break;  // only need to do for the first dimension

    } else if(gradType == WRTVariance) {
      max[k] = -Kmax / pMin[aRoot] * (0.5 / bwmin) * (Dmax * Dmax / bwmin - 1);
      min[k] = -Kmin / pMax[aRoot] * (0.5 / bwmax) * (Dmin * Dmin / bwmax - 1);

    } else {
      max[k] = min[k] = 0;
    }
//    printf("  %d -- %f %f  -> %f / %f\n",k,Kmin,Kmax,min[k],max[k]);
  }
}  


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/denoise.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *  
 *  Any changes to the code must include this original license notice in full.
 * Original code by Yucheng Low, CMU
 * Modified to Gaussian Mixture by DAnny Bickson, CMU
 * Based on Matlab code by Alex Ihler, UC Irvine
 * See the paper: Nonparametric Belief Propagation. E. Sudderth, A. Ihler, W. Freeman, and A. Willsky. CVPR, June 2003.
 */


#include <iostream>
#include <map>
#include <graphlab.hpp>
#include <limits>
#include "kde.h"
#include <float.h>
#include "image.hpp"
#include "prodSampleEpsilon.hpp"
#include <set>

#include <itpp/itstat.h>
#include <itpp/itbase.h>

#include <graphlab/macros_def.hpp>

#define PROPOSAL_STDEV 20

using namespace itpp;
using namespace std;


int NSAMP =12; //number of samples
double EPSILON =1e-5; //epsilon (accuracy of product sampling)
int MAX_ITERATIONS = 10;
int iiter = 0;

// STRUCTS (Edge and Vertex data) =============================================>
struct edge_data: public graphlab::unsupported_serialize {
  kde msg; //the NBP message sent along this edge
  kde edge_pot; //edge potential of this edge
  int update_count;
}; 

struct vertex_data: public graphlab::unsupported_serialize {
  kde obs; //ovservation
  kde bel; //belief
  int rounds;
  vertex_data(){ rounds = 0;}
};

typedef graphlab::graph<vertex_data, edge_data> graph_type;
typedef graphlab::types<graph_type> gl_types;


/**
 * compare MAE (mean average error) of the true image vs. inferred image
 */
double image_compare_mae(image &trueimg, image &infered) {
    assert(trueimg.rows() == infered.rows());
    assert(trueimg.cols() == infered.cols());
    // get the set of colors in the trueimg
    std::set<int> colors;
    for (size_t i = 0; i < trueimg.rows(); ++i) {
      for (size_t j = 0; j < trueimg.cols(); ++j) {
        colors.insert(size_t(trueimg.pixel(i,j)));
      }
    }
    
    // fill a rounding color map
    int colormap[256];
    int previval = -256;
    std::set<int>::iterator curi = colors.begin();
    std::set<int>::iterator nexti = curi;
    nexti++;
    int nextival = (nexti != colors.end())?*nexti:512;
    while (curi != colors.end()) {
      int low = (previval + (*curi)) / 2; if (low < 0) low = 0;
      int high = (nextival + (*curi)) / 2; if (high > 256) high = 256;
      
      for (int i = low; i < high; ++i) {
          colormap[i] = (*curi);
      }
      previval = (*curi);
      curi++;
      nexti++;
      nextival = (nexti != colors.end())?*nexti:512;
    }
    
    // compute absolute difference
    double err = 0;
    for (size_t i = 0; i < infered.rows(); ++i) {
      for (size_t j = 0; j < infered.cols(); ++j) {
        //err  += (infered.pixel(i,j) - trueimg.pixel(i,j)) * (infered.pixel(i,j) - trueimg.pixel(i,j)) ;
        err += fabs(infered.pixel(i,j) - trueimg.pixel(i,j));
      }
    }
    err  /= (infered.rows() * infered.cols());
    return err;
}

/**
 * compare RMSE (root mean square error) of true image vs. inferred image
 */
double image_compare_rmse(image &trueimg, image &infered) {
    assert(trueimg.rows() == infered.rows());
    assert(trueimg.cols() == infered.cols());
    // get the set of colors in the trueimg
    std::set<int> colors;
    for (size_t i = 0; i < trueimg.rows(); ++i) {
      for (size_t j = 0; j < trueimg.cols(); ++j) {
        colors.insert(size_t(trueimg.pixel(i,j)));
      }
    }
    
    // fill a rounding color map
    int colormap[256];
    int previval = -256;
    std::set<int>::iterator curi = colors.begin();
    std::set<int>::iterator nexti = curi;
    nexti++;
    int nextival = (nexti != colors.end())?*nexti:512;
    while (curi != colors.end()) {
      int low = (previval + (*curi)) / 2; if (low < 0) low = 0;
      int high = (nextival + (*curi)) / 2; if (high > 256) high = 256;
      
      for (int i = low; i < high; ++i) {
          colormap[i] = (*curi);
      }
      previval = (*curi);
      curi++;
      nexti++;
      nextival = (nexti != colors.end())?*nexti:512;
    }
    
    // compute absolute difference
    double err = 0;
    for (size_t i = 0; i < infered.rows(); ++i) {
      for (size_t j = 0; j < infered.cols(); ++j) {
        err  += (infered.pixel(i,j) - trueimg.pixel(i,j)) * (infered.pixel(i,j) - trueimg.pixel(i,j)) ;
//        err += fabs(infered.pixel(i,j) - trueimg.pixel(i,j));
      }
    }
    err  /= (infered.rows() * infered.cols());
    return err;
}


/**
 * Non-parametric BP update function
 */
void nbp_update(gl_types::iscope& scope,
               gl_types::icallback& scheduler) {


  bool debug = true;

  vertex_data& v_data = scope.vertex_data();
  graphlab::vertex_id_t vid = scope.vertex();
  if (debug && vid%1000000 == 0){
     std::cout<<"Entering node " << (int)vid << " obs: ";
     v_data.obs.matlab_print();
     std::cout << std::endl;
  }

  v_data.rounds++;

  if ((int)vid == 0)
      iiter++;

  gl_types::edge_list in_edges = scope.in_edge_ids();
  gl_types::edge_list out_edges = scope.out_edge_ids();
  assert(in_edges.size() == out_edges.size()); // Sanity check

  //for each incoming message
  for (size_t j = 0; j < in_edges.size(); ++j){
   
     std::vector<kde> kdes;
     for(size_t i = 0; i < in_edges.size(); ++i) {
    
      graphlab::edge_id_t ineid = in_edges[i];
      edge_data& in_edge = scope.edge_data(ineid);
      if (i != j){
         in_edge.msg.verify();
         //add the message into the mixture list
         kdes.push_back(in_edge.msg);
      }
     }

     kdes.push_back(v_data.obs);
     
     graphlab::edge_id_t outeid = out_edges[j];
     edge_data& out_edge = scope.edge_data(outeid);
     kde marg = out_edge.edge_pot.marginal(0);  
      //insert the marginal of this dimension as the first item of the mixture
     kdes.insert(kdes.begin(), marg);//important: has to be first!     

      //compute the mixtures product
     prodSampleEpsilon producter; 
     kde m = producter.prodSampleEpsilonRun(kdes.size(), NSAMP, EPSILON, kdes);
     
     m.verify();
     kde mar2 = out_edge.edge_pot.marginal(1);
     mar2.verify();
     imat firstrowind = m.indices(0,0,0,m.indices.cols()-1);
      //sample from marginal, using indices taken from product
     kde outmsg = mar2.sample(firstrowind,m.weights);
     outmsg.verify(); 
     out_edge.msg = outmsg;

  }

   //compute belief by multiplying self potential with all incoming message
   if (v_data.rounds == MAX_ITERATIONS){
	if (debug && vid%100000 == 0)
	   printf("computing belief node %d\n", vid);

      std::vector<kde> kdes;
      for (size_t j = 0; j < in_edges.size(); ++j){
        graphlab::edge_id_t ineid = in_edges[j];
        edge_data& in_edge = scope.edge_data(ineid);
        in_edge.msg.verify();
        //add all incoming message to mixture list
        kdes.push_back(in_edge.msg);
      }

      //add self potential
      kdes.push_back(v_data.obs);
      //compute the product
      prodSampleEpsilon prod;
      kde m = prod.prodSampleEpsilonRun(kdes.size(), NSAMP, EPSILON, kdes);
     
      m.verify();
      //store the result
      v_data.bel = m;
      if (debug && vid == 0){
	   printf("belief node %d is\n", vid);
           m.matlab_print(); printf("\n");
      }
   }

} // end of nbp_update


void construct_graph(image& img,
                     kde & edge_pot,
                     gl_types::graph& graph) {

  
  for(size_t i = 0; i < img.rows(); ++i) {
    for(size_t j = 0; j < img.cols(); ++j) {
      vertex_data vdat;
      vdat.rounds = 0;

      // Set the node potentials
      vec cent = zeros(2);
      //center of mixture component is around pixel color
      cent[0] = img.pixel(i,j);
      cent[1] = img.pixel(i,j);
      mat cent2 = cent; cent2 = transpose(cent2);
      vec bw = "30 30";
      mat bw2 = bw; bw2 = transpose(bw2);
      vec wght = "1 1";
      //create a mixture
      vdat.obs = kde(cent2, bw2, wght);
      vdat.bel = vdat.obs;
      if (i == 0 && j == 0)
       vdat.obs.matlab_print();
      graph.add_vertex(vdat);
      vdat.obs.verify();
      vdat.bel.verify();

    } // end of for j in cols
  } // end of for i in rows

  edge_data edata;
  edata.edge_pot = edge_pot;
  edata.edge_pot.matlab_print();

  //add the edges to the grid graph
  for(size_t i = 0; i < img.rows(); ++i) {
    for(size_t j = 0; j < img.cols(); ++j) {
      
      size_t vertid = img.vertid(i,j);
      if(i-1 < img.rows()) {
        edata.msg = graph.vertex_data(img.vertid(i-1, j)).bel;
        graph.add_edge(vertid, img.vertid(i-1, j), edata);
      }
      if(i+1 < img.rows()) {
        edata.msg = graph.vertex_data(img.vertid(i+1, j)).bel;
        graph.add_edge(vertid, img.vertid(i+1, j), edata);
      }
      if(j-1 < img.cols()) {
        edata.msg = graph.vertex_data(img.vertid(i, j-1)).bel;
        graph.add_edge(vertid, img.vertid(i, j-1), edata);
      }
      if(j+1 < img.cols()) {
        edata.msg = graph.vertex_data(img.vertid(i, j+1)).bel;
        graph.add_edge(vertid, img.vertid(i, j+1), edata);
      }
    } // end of for j in cols
  } // end of for i in rows
  graph.finalize();
} // End of construct graph


// MAIN =======================================================================>
int main(int argc, char** argv) {
  // set the global logger
  global_logger().set_log_level(LOG_WARNING);
  global_logger().set_log_to_console(true);

  std::string gmmfile= "";
  std::string inputfile = "";

    // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("NBP image denoising");
  clopts.attach_option("epsilon",
                       &EPSILON, EPSILON,
                       "epsilon - product accuracy");
  clopts.attach_option("gmmfile",
                       &gmmfile, std::string(""),
                       "true image + self and edge potential file");
  clopts.attach_option("inputfile",
                       &inputfile, std::string(""),
                       "the input noisy image");
  clopts.attach_option("max_iter", &MAX_ITERATIONS, MAX_ITERATIONS, "maximum number of iterations. In this round the belief is compted");

  clopts.set_scheduler_type("round_robin");

  bool success = clopts.parse(argc, argv);
  if(!success) {
    return EXIT_FAILURE;
  }

  // load the potentials mixture components
  it_ifile f(gmmfile.c_str());

  mat edgecenter, edgesigma, edgeweight;
  mat nodecenter, nodesigma, nodeweight;
  ivec truedata;
  ivec imgsize;
  imat integermat;
  vec doublevec;
  //read edge potentials
  f >> Name("edge_ce") >> integermat;   edgecenter = to_mat(integermat);
  f >> Name("edge_alpha") >> doublevec; edgeweight = doublevec;
  f >> Name("edge_sigma") >> doublevec; edgesigma = doublevec;

  //read self potential
  f >> Name("like_ce") >> nodecenter;
  f >> Name("like_alpha") >> doublevec; nodeweight = doublevec;
  f >> Name("like_sigma") >> doublevec; nodesigma = doublevec;
  
  //read true image
  f >> Name("img1") >> truedata;
  //read image size
  f >> Name("isize") >> imgsize;

  size_t rows = imgsize(0);
  size_t cols = imgsize(1);
  std::cout << "Image size is " << rows << " x " << cols << std::endl;
  mat edgesigma2 = edgesigma;
  edgesigma2 = transpose(edgesigma2);
  mat edgeweight2 = edgeweight;
  edgeweight2 = transpose(edgeweight2); 
  if (edgesigma2.cols() > edgecenter.cols())
	edgesigma2 = edgesigma2(0,0,0,edgecenter.cols()-1);
  kde edge_pot = kde(edgecenter, edgesigma2, edgeweight2);

// convert the true image to an image
  image trueimg(rows, cols);
  for (size_t i = 0; i < size_t(truedata.size()); ++i) {
    trueimg.pixel(i) = truedata(i);
  }

  //read noisy image
  it_ifile imgfile(inputfile.c_str());
  vec observations;
  imgfile >> Name("obs2") >> observations;
  // convert observations to an image
  image img(rows, cols);
  for (size_t i = 0;i < size_t(observations.size()); ++i) {
    img.pixel(i) = observations(i);
  }
  img.save("noisy.pgm");
  trueimg.save("source_img.pgm");

  // Create the graph --------------------------------------------------------->
  gl_types::core core;
  // Set the engine options
  core.set_engine_options(clopts);

  std::cout << "Constructing pairwise Markov Random Field. " << std::endl;
  construct_graph(img, edge_pot, core.graph());

  // Running the engine ------------------------------------------------------->
  core.sched_options().add_option("update_function", nbp_update);
  std::cout << "Running the engine. " << std::endl;

  // Add the bp update to all vertices
  core.add_task_to_all(nbp_update, 100.0);
  // Starte the engine
  const double runtime = core.start();

  // Saving the output -------------------------------------------------------->
  std::cout << "Rendering the cleaned image. " << std::endl;
  
  //parse belief to find the reconstructed image 
  for(size_t v = 0; v < core.graph().num_vertices(); ++v) {
    const vertex_data& vdata = core.graph().vertex_data(v);
    float a = vdata.bel.max();
    if (a < 0) a = 0;
    if (a > 255) a = 255;
    img.pixel(v) = size_t(a);
  }
  double err = sqrt(image_compare_rmse(trueimg, img));
  double err2 = image_compare_mae(trueimg, img);
  img.save("inferred.pgm");
  std::cout << "RMSE: " << err << " MAE: "<< err2<<std::endl;
  std::cout << "Done!" << std::endl;
  return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/fakemex.h
================================================
#ifndef FAKE_MEX
#define FAKE_MEX
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *  
 *  Any changes to the code must include this original license notice in full.
 *  Written by Danny Bickson, CMU 
    File for defining mex commands in their C implementation */


#include <stdlib.h>
#include <assert.h>

#define mxMalloc malloc
#define mxFree free
#define mexErrMsgTxt(a) {printf(a); assert(false); }
#define mxDestroyArray

#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/image.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef IMAGE_HPP
#define IMAGE_HPP

#include <cassert>
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <limits>
#include <cmath>

#include <boost/random.hpp>


#include <graphlab.hpp>

#include <graphlab/macros_def.hpp>

/** A simple struct represent a gray scale image */
class image {
  size_t _rows, _cols;
  std::vector<double> data;
public:

  /** Create an empty image */
  image() : _rows(0), _cols(0), data(0,0) { }
  
  /** Create an image of a fixed size */
  image(size_t rows, size_t cols) : 
    _rows(rows), _cols(cols), data(rows * cols, 0) { }

  inline void resize(size_t rows, size_t cols);

  /** Get the number of rows */
  inline size_t rows() const { return _rows; }

  /** Get the number of columns */
  inline size_t cols() const { return _cols; }

  /** get the number of pixels */
  inline size_t pixels() const { return _rows * _cols; }

  /** A function to read a pixel */
  inline double& pixel(size_t i, size_t j) { return data[vertid(i,j)]; }
  inline double pixel(size_t i, size_t j) const { return data[vertid(i,j)]; }
  
  /** Linear indexing */
  inline double& pixel(size_t i) { return data.at(i); }
  inline double pixel(size_t i) const { return data.at(i); }

  /** Get the vertex id of a pixel */
  inline size_t vertid(size_t i, size_t j) const;
  inline static size_t vertid(size_t rows, size_t cols, size_t i, size_t j) {
    assert(i < rows);
    assert(j < cols);    
    return i * cols + j; 
  }
  
  
  /** Get the pixel address from the vertex id */
  std::pair<size_t, size_t> loc(size_t vertex) const;

  
  /** A function to save the image to a file in pgm format */
  inline void save(const char* filename, bool autorescale = true, double min=0, double max=255) const;

  inline void save_vec(const char* filename) const {
    std::ofstream os(filename);
    assert(os.good());
    for(size_t i = 0; i < pixels(); ++i) {
      os << pixel(i) << "\n";
    }
    os.flush();
    os.close();
  }

  
  /** paint a beautiful sunset */
  void paint_sunset(size_t num_rings);
  
  /** Add random noise to the image */
  void corrupt(double sigma);

  inline double min() {
    return *std::min_element(data.begin(), data.end());
  }

  inline double max() {
    return *std::max_element(data.begin(), data.end());
  }

  inline void save(graphlab::oarchive &oarc) const {
    oarc << _rows;
    oarc << _cols;
    oarc << data;
  }
  
  inline void load(graphlab::iarchive &iarc) {
    iarc >> _rows;
    iarc >> _cols;
    iarc >> data;
  }


};


/** Generate a normally distributed random number N(mu, sigma^2) */
// std::pair<double, double> randn(double mu = 0, double sigma = 1 ); 


// IMPLEMENTATION =============================================================>


inline void image::resize(size_t rows, size_t cols) {
  _rows = rows;
  _cols = cols;
  data.resize(rows * cols, 0);
}
  

/** Get the vertex id of a pixel */
inline size_t image::vertid(size_t i, size_t j) const {
  assert(i < _rows);
  assert(j < _cols);    
  return i * _cols + j; 
}

// static size_t image::vertid(size_t rows, size_t cols, size_t i, size_t j)  {
//   assert(i < rows);
//   assert(j < cols);    
//   return i * cols + j; 
// }


/** Get the vertex id of a pixel */
inline std::pair<size_t, size_t> image::loc(size_t vertexid) const {
  assert(vertexid < _rows * _cols);
  return std::make_pair( vertexid / _cols, vertexid % _cols);
}


inline void image::save(const char* filename, bool autorescale, double min_, double max_) const {
  assert(_rows > 0 && _cols > 0);
  std::ofstream os(filename);
  os << "P2" << std::endl
     << _cols << " " << _rows << std::endl
     << 255 << std::endl;
  // Compute min and max pixel intensities
  double min = data[0]; double max = data[0];
  if (autorescale) {
    for(size_t i = 0; i < _rows * _cols; ++i) {
      min = std::min(min, data[i]);
      max = std::max(max, data[i]);
    }
  }
  else {
    min = min_;
    max = max_;
  }
  // Save the image (rescaled)
  for(size_t r = 0; r < _rows; ++r) {
    for(size_t c = 0; c < _cols; c++) {
      if(min != max) {
        int color = 
          static_cast<int>(255.0 * (pixel(r,c) - min)/(max-min));
        if (color < 0) color = 0;
        if (color > 255) color = 255;
        os << color;
      } else { os << min; }
      if(c != _cols-1) os << "\t";
    }
    os << std::endl;
  } 
  os.flush();
  os.close();
} // end of save


inline void image::paint_sunset(size_t num_rings) {
  const double center_r = rows() / 2.0;
  const double center_c = cols() / 2.0;
  const double max_radius = std::min(rows(), cols()) / 2.0;
  // Fill out the image
  for(size_t r = 0; r < rows(); ++r) {
    for(size_t c = 0; c < cols(); ++c) {
      double distance = sqrt((r-center_r)*(r-center_r) + 
                             (c-center_c)*(c-center_c));
      // If on top of image
      if(r < rows() / 2) {
        // Compute ring of sunset
        size_t ring = 
          static_cast<size_t>(std::floor(std::min(1.0, distance/max_radius)
                                         * (num_rings - 1) ) );
        pixel(r,c) = ring;
      } else {
        pixel(r,c) = 0;
      }
    }
  }
} // end of paint_beatiful_sunset


/** corrupt the image with gaussian noise */
inline void image::corrupt(double sigma) {
  //  boost::mt19937 rng;
  boost::lagged_fibonacci607 rng;
  boost::normal_distribution<double> noise_model(0, sigma);
  for(size_t i = 0; i < rows() * cols();  ) {
    // Corrupt two pixels at a time.
    pixel(i++) += noise_model(rng);
  }
} // end of corrupt_image


// /** generate a normally distributed iid pair */
// std::pair<double, double> randn(double mu , double sigma ) {
//   // Generate a N(0,1) from a Unif(0,1) using Box-Muller generator:
//   double u1 = static_cast<double>(rand()) / RAND_MAX;
//   double u2 = static_cast<double>(rand()) / RAND_MAX;
//   double coeff = std::sqrt(-2.0 * std::log(u1));
//   double n1 = coeff * std::cos(2.0 * M_PI * u2) ;
//   double n2 = coeff * std::sin(2.0 * M_PI * u2) ;
//   // Adjust for mean and variance
//   n1 = sigma * n1 + mu;
//   n2 = sigma * n2 + mu; 
//   return std::make_pair(n1, n2);
// } // end of randn

#include <graphlab/macros_undef.hpp>

#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/kde.h
================================================
#ifndef __KDE_H
#define __KDE_H


#include <itpp/itbase.h>
#include <itpp/stat/misc_stat.h>
#include "assert.h"
#include <vector>

/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *  
 *  Any changes to the code must include this original license notice in full.
  * KDE code written by Danny Bickson, CMU
 *  Based on Matlab code by Alex Ihler, UC Irvine
 */

typedef itpp::Mat<unsigned int> uimat;

inline double square(const double v) {
  return v * v;
}

/**
 * function for computing the log likelihood of N(u, sigma^2) at location x 
 * */
inline double log_likelihood(double u, double sigma, double x){
  return log(1.0/sigma) - square(x - u) / (2 * sigma * sigma);
}

/**
 * class for storing a KDE
 */
class kde{
public:
        //mixture centers
	itpp::mat centers;
        //mixture bandwidths
	itpp::mat bw; 
        //mixture weights
        itpp::vec weights;
        //selected mixture indices
        itpp::imat indices;

        kde(){};

	kde(itpp::mat &_centers, itpp::mat &_bw, itpp::vec &_weights){
	    centers = _centers;
            bw = _bw;
            weights = _weights;
            normalize_weights();
        }
	kde(double _center, double _bw, double _weight){
	    centers = itpp::zeros(1,1);
            centers.set(0,0,_center);
            bw = itpp::zeros(1,1);
            bw.set(0,0,_bw);
            weights = itpp::vec(1);
            weights[0] = _weight;
            normalize_weights();
        }
 	kde(const char * _centers, const char * _bw, const char * _weights){
	    centers = itpp::mat(_centers);
            bw = itpp::mat(_bw);
            weights = itpp::vec(_weights);
            normalize_weights();
        }
        kde(itpp::mat &_centers, itpp::mat &_bw, itpp::mat &_weights){
	    centers = _centers;
            bw = _bw;
            assert(_weights.rows() == 1);
            weights = _weights.get_row(0);
            normalize_weights();
        }
	kde(itpp::mat &_centers, itpp::mat &_bw){
	    centers = _centers;
            bw = _bw;
        }
	/*
 	* compute marginal along a certain dimension 	
 	 */
        kde marginal(int dim){
	    assert(dim < centers.rows());
            assert(dim >= 0);
            itpp::mat slice = centers(dim, dim, 0, centers.cols()-1);
            itpp::mat bwslice = bw;
            if (bw.rows() > 1)
                 bwslice = bw(dim, dim, 0, bw.cols() - 1); 
            return kde(slice, bwslice, weights); 
        }

      /**
        * normalize mixture weights to sum into 1
        */
       void normalize_weights(){
           double sum = itpp::sum(weights);
           assert(sum > 0);
           weights = weights/ sum;
       }


        /*
         * find the maximal mixture component
         */
        double max() const{
           assert(getDim() == 1);
           double max = -1e100;
           int pos = -1;
           for (int i=0; i<getPoints(); i++){
                double like = likelihood(centers(i));
                if (max < like){
		   max = like;
                   pos = i;
                }
                  
           }
           assert(pos >= 0);
           return centers(pos);
        }

       
        /*
         * compute likelihood of a mixutre which is composed from a weighted sum of likelihood of mixture components 
         */
        double likelihood(const double &d) const{
           double ret = 0;
           for (size_t j = 0;j < (size_t)getPoints(); ++j) {
             double curll = log_likelihood(centers(j), bw(j), d);
             ret += weights(j) * exp(curll);
           }
           return ret;
       }

 
        /*
         * verify mixture params
         */
        void verify() const{
	    assert(sum(weights) > 0);
            assert(sumsum(bw) > 0);
            assert(itpp::min(itpp::min(bw))>0);
            assert(centers.rows() > 0);
            assert(centers.cols() > 0);
            assert(itpp::max(itpp::max(centers)) < 1e10);
            assert(itpp::max(itpp::max(bw)) < 1e10);
            assert(centers.size() < 1000);
            assert(weights.size() == getPoints());
            assert(bw.cols() == centers.cols()); 
            assert(weights.size() == centers.cols());
            if (indices.size() > 0){
                assert(itpp::max(indices.get_row(0))< 10*getPoints());//TODO more careful checkiung;
                assert(itpp::min(indices.get_row(0))>=0);
             }
                             
       }
    /*
    * return the number of dimensions 
    */
        int getDim() const{
            return centers.rows();

     /* return the number of mixture components */
        }

        int getPoints() const{
            return centers.cols();
        }

        /* sample from a mixture using the specified indices */
        // points = pts(:,ind) + getBW(npd,ind).*randKernel(getDim(npd),length(ind),getType(npd));
         kde sample(itpp::imat & ind,itpp::vec & _weights){
             assert(sum(_weights)>0);
             assert(itpp::max(itpp::max(ind)) < centers.cols());
             assert(itpp::min(itpp::min(ind)) >= 0);
             assert(itpp::max(itpp::max(ind)) < getPoints());
             itpp::mat randN; 
             itpp::randn(getDim(), ind.size(), randN);
             itpp::mat pts = itpp::zeros(centers.rows(), ind.size());
             itpp::mat pbw = itpp::zeros(centers.rows(), ind.size());
             for (int i=0; i< centers.rows(); i++){
                for (int j=0; j< ind.size(); j++){
                   pts.set(i,j,centers(i,ind(j)));
                   pbw.set(i,j,bw(ind(j)));
                }
             }
             itpp::mat points = pts + elem_mult(pbw, randN);
             return kde(points, pbw, _weights);
                    
         }

         /* sample from a mixture using random indices */
         kde sample(){
            itpp::ivec ind2 = itpp::randi(getPoints(), 0, getPoints() -1);
            itpp::imat mind2(1,getPoints());
            for (int i=0; i< getPoints(); i++)
               mind2(0,i) = ind2(i);
            itpp::vec weights2 = itpp::ones(getPoints());
            return sample(mind2, weights2);
         }


      /* debugging functions */ 
      static void matlab_print(const itpp::ivec & data){
          for (int i=0; i< data.size(); i++)
           	std::cout<<" "<<data(i);
      }

    static void matlab_print(const itpp::vec & data){
          for (int i=0; i< data.size(); i++)
           	std::cout<<" "<<data(i);
      }

      static void matlab_print(const itpp::mat & data){
          std::cout<<"[";
          for (int i=0; i< data.rows(); i++){
             matlab_print(data.get_row(i));
             if (i < data.rows() -1 )
                 std::cout<<";";
          }
          std::cout<<"]";
      }
    static void matlab_print(const itpp::imat & data){
          std::cout<<"[";
          for (int i=0; i< data.rows(); i++){
             matlab_print(data.get_row(i));
             if (i < data.rows() -1 )
                 std::cout<<";";
          }
          std::cout<<"]";
      }


      void matlab_print(){
          std::cout<<"kde("; 
          matlab_print(centers);
          std::cout<< ",";
          matlab_print(bw); 
          std::cout<< ",[";
          matlab_print(weights);
          std::cout << "]);" << std::endl;
          if (indices.size() > 0)
            std::cout<<"indices=[";
            matlab_print(indices);
            std::cout<<std::endl;
      }
      
/*function h = ksizeROT(npd,noIQR)
% "Rule of Thumb" estimate (Silverman)
%    Estimate is based on assumptions of Gaussian data and kernel
%    Actually the multivariate version in Scott ('92) 
%  Use ksizeROT(X,1) to force use of stddev. instead of min(std,C*iqr)
%       (iqr = interquartile range, C*iqr = robust stddev estimate)
%

% Copyright (C) 2003 Alexander Ihler; distributable under GPL -- see README.txt

  X = getPoints(npd);
  N = size(X,2);  dim = size(X,1);
  if (nargin<2) noIQR=0; end;

  Rg = .282095; Mg=1;                     % See ksizeCalcUseful for derivation
  Re = .6;      Me = .199994;             %   this is the canonical kernel adjustment
  Rl = .25;     Ml = 1.994473;            %   for product kernels of these types
  switch(npd.type),
      case 0, prop = 1.0;                 % Approximate; 1D prop = 1.059224; % Gaussian
      case 1, prop = ((Re/Rg)^dim / (Me/Mg)^2 )^(1/(dim+4)); % 1D prop = 2.344944; % Epanetchnikov
      case 2, prop = ((Rl/Rg)^dim / (Ml/Mg)^2 )^(1/(dim+4)); % 1D prop = 0.784452; % Laplacian
  end;
  
  sig = std(X,0,2);            % estimate sigma (standard)
  if (noIQR)
    h = prop*sig*N^(-1/(4+dim));
  else  
    iqrSig = .7413*iqr(X')';     % find interquartile range sigma est.
    if (max(iqrSig)==0) iqrSig=sig; end;
    h = prop * min(sig,iqrSig) * N^(-1/(4+dim));
  end;

%
*/

        void ROT(){
            assert(getDim() == 1);
            assert(getPoints() > 1); //no meaning to compute variance over one point
            double prop = 1.0;
            double sig = sqrt(itpp::variance(centers.get_row(0)));
            assert(!std::isnan(sig));
            assert(sig > 0);
            double h = prop*sig*powf(getPoints(),(-1.0/(4.0+getDim())));
            bw = itpp::ones(1,getPoints()) * h; 
        }

  };


      /* unit testing */
      /****************/
      inline void test_marginal(){ 
          printf("testing marginal..\n");
          itpp::mat mcenters = "1 2 3; 3 2 1";
          itpp::mat mbw = "0.5 0.5 0.2; 0.5 0.5 0.2";
          itpp::vec weights = "0.2 0.3 0.4";
          kde k = kde(mcenters, mbw, weights);
          assert(k.getDim() == 2);
          assert(k.getPoints() == 3); 
          k.verify();
          k.matlab_print();
          kde k1 = k.marginal(0);
          k1.matlab_print();
          assert(k1.centers.get_row(0) == itpp::vec(" 1 2 3"));
          assert(k1.bw.get_row(0) == itpp::vec(".5 .5 .2"));
          assert(square(k1.weights(0) - 0.22222) < 1e-8);
 
          kde k2 = k.marginal(1);
          assert(k2.centers.get_row(0) == itpp::vec(" 3 2 1"));
          assert(k2.bw.get_row(0) == itpp::vec(".5 .5 .2"));
          assert(square(k2.weights(0) - 0.22222) < 1e-8);
          k2.matlab_print();
      }

       inline void test_max(){ 
          printf("testing max..\n");
          itpp::mat mcenters = " 1  2   3    -1  -2  3    2   1";
          itpp::mat mbw =      "0.5 0.5 0.2  0.5 0.5 0.2  3   2";
          itpp::vec weights =  "0.2 0.3 0.4  0.1 0.05 0.05 0.05 0.05";
          kde k = kde(mcenters, mbw, weights);
          k.matlab_print();
          std::cout<<k.max()<<std::endl;
          assert(k.max() == 3);
	  for (int i=0; i< mcenters.cols(); i++)
          	std::cout<<"i:"<<i<<" "<< k.likelihood(mcenters(i))<<std::endl;
          kde k1 = kde("3 3 2", "3 3 2", "1 1 1");
          assert(k1.max() == 2);
          kde k2 = kde("3 3 2", " 1 1 2", "1 1 1");
          assert(k2.max() == 3);
        }

        inline void test_sample(){
          printf("testing sample..\n");
           itpp::mat cent = "0"; itpp::mat bw= "1"; itpp::mat weight = "1";
           kde k(cent, bw, weight);
           k.matlab_print();
           double sum = 0;
           itpp::imat ind = "0";
           itpp::vec vweight = "1";
           for (int i=0; i< 10000; i++){
              kde out = k.sample(ind, vweight);
              sum += out.centers(0);
           }
           std::cout<<" mean is: " << sum/10000 << " should be: 0 "<< std::endl;

        }
       inline void test_sample2(){
          printf("testing sample2..\n");
           itpp::mat cent = "1 2 3 1 -1 2"; itpp::mat bw= "1 0.5 0.1 0.01 3 2"; itpp::mat weight = "0.5 0.2 0.1 0.1 0.1 0.1";
           kde k(cent, bw, weight);
           k.matlab_print();
           double sum = 0;
           itpp::imat ind =     "0 1 3 2 3 2 3 3 2 1 4 5";
           itpp::vec vweight = ".5 .5 .2 .1 .2 .1 .2 .1 .1 .05 .05";
           for (int i=0; i< 10000; i++){
              kde out = k.sample(ind, vweight);
              sum += itpp::sum(itpp::sum(out.centers));
           }
           std::cout<<" mean is: " << sum/(11*01000) << std::endl;

        }

        inline void test_ROT(){
          printf("testing ROT..\n");
          kde k1 = kde("3 3 2", "3 3 2", "1 1 1");
          k1.matlab_print();
          k1.ROT();
          k1.matlab_print();
          assert(square(k1.bw(0) - 0.4635)<1e-8);
        }

 
#endif


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/prob.hpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef NPROB_HPP
#define NPROB_HPP


#include <cmath>
//#include <itpp/itbase.h>
//#include <itpp/stat/misc_stat.h>
#define pi 3.14152965

#include "graphlab/util/random.hpp"

using namespace itpp;
using namespace std;


void randv(int n, vec & ret){
   assert(n>=1);
   for (int i=0; i< n; i++)
       //ret[i] = drand48();
       ret[i] = graphlab::random::rand01();
}
mat randn1(int Dx, int Dy){
  if (Dx == 0)
    Dx = 1;
  assert(Dy>=1);
  mat ret = zeros(Dx,Dy);
  vec us = zeros(ceil(Dx*Dy/2.0)*2); 
  randv(ceil(Dx*Dy/2.0)*2, us);
  int k=0;
  for (int i=0; i<Dx; i++){
     for (int j=0; j< Dy; j++){
         if (k % 2 == 0)
         	ret(i,j) = sqrt(-2.0*std::log(us[k/2]))*std::cos(2*pi*us[k/2+1]);
         else
         	ret(i,j) = sqrt(-2.0*std::log(us[k/2]))*std::sin(2*pi*us[k/2+1]);
         k++;
     }
  }
  assert(k == Dx*Dy);
  assert(ret.rows() == Dx && ret.cols() == Dy);
  return ret;
}


#endif //NPROB_HPP


================================================
FILE: toolkits/graphical_models/deprecated/kernelbp/old/prodSampleEpsilon.hpp
================================================
/* Copyright (c) 2003 Alexander Ihler
 * Original code from: http://www.ics.uci.edu/~ihler/code/index.html
 *
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License. 
 ***********************************************************************
 ** multi-tree approximate sampling MEX code
 **
 **
 ***********************************************************************/
//
// Written by Alex Ihler and Mike Mandel
// Copyright (C) 2003 Alexander Ihler
// Converted to C++ by Danny Bickson, CMU, 2010

#ifndef PROD_SAMPL_EPS
#define PROD_SAMPL_EPS


#include "fakemex.h"
#include <itpp/itstat.h>
#include <itpp/itbase.h>
#include <itpp/base/sort.h>
#include "kde.h"

#include "cpp/BallTreeDensity.h"
#include "prob.hpp"

class prodSampleEpsilon{

public:


  // a little addressing formula: 
  //   to access a^th dimension of density pair (b,c)'s constant
#define SIGVALSMAX(a,b,c) (SigValsMax + a+Ndim*b+Ndim*Ndens*c)
#define SIGVALSMIN(a,b,c) (SigValsMin + a+Ndim*b+Ndim*Ndens*c)
  double *SigValsMax, *SigValsMin;

  //BallTreeDensity *trees;    // structure of all trees
  std::vector<BallTreeDensity> trees;
  BallTree::index *ind;      // indices of this level of the trees

  double *C,*sC,*M;

  double *randunif1, *randunif2, *randnorm;  // required random numbers
  double *samples;
  BallTree::index* indices;    // return data

  double maxErr;                 // epsilon tolerance (%) of algorithm
  double total, soFar, soFarMin; // partition f'n and accumulation

  unsigned int Ndim,Ndens;   // useful constants
  unsigned long Nsamp;
  bool bwUniform ;

  prodSampleEpsilon(){
    SigValsMin = SigValsMax = 0;
    ind = 0; 
    C = sC = M = 0;
    randunif2 = randunif1 = randnorm = 0;
    samples = 0; indices = 0;
    maxErr = 0; total = 0; soFarMin =0; soFar = 0;
    Ndim = 0; Ndens = 0; Nsamp = 0; bwUniform = true;
  }

  ~prodSampleEpsilon(){

    mxFree(C); mxFree(sC); mxFree(M); mxFree(SigValsMin); mxFree(SigValsMax);
  }

#ifdef MEX
  //////////////////////////////////////////////////////////////////////
  // MEX WRAPPER
  //////////////////////////////////////////////////////////////////////
  void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  {
    mxArray *rNorm, *rUnif1, *rUnif2, *rsize;
    unsigned int i,j;
  
    /*********************************************************************
     ** Verify arguments and initialize variables
     *********************************************************************/

    if (nrhs != 3)
      mexErrMsgTxt("Takes 3 input arguments");
    if (nlhs >  2)
      mexErrMsgTxt("Outputs 2 results");

    Ndens = mxGetN(prhs[0]);                               // get # of densities
    //  trees = new BallTreeDensity[Ndens];
    trees = (BallTreeDensity*) mxMalloc(Ndens*sizeof(BallTreeDensity));
    bwUniform = true;
    bool allGaussians = true;
    for (i=0;i<Ndens;i++) {                               // load densities
      trees[i] = BallTreeDensity( mxGetCell(prhs[0],i) );  
      if (trees[i].getType() != BallTreeDensity::Gaussian) allGaussians = false;
      bwUniform = bwUniform && trees[i].bwUniform();
    }
    if (!allGaussians)
      mexErrMsgTxt("Sorry -- only Gaussian kernels supported");

    Ndim  = trees[0].Ndim();                      // more accessible dimension variable
    Nsamp = (unsigned long) mxGetScalar(prhs[1]); // # of requested samples
    maxErr= 2*mxGetScalar(prhs[2]);               // epsilon (we always use 2*epsilon)

    // Obtain enough random numbers for the sampling algorithm
    //
    rsize = mxCreateDoubleMatrix(1,2,mxREAL);
    double* rsizeP= mxGetPr(rsize); rsizeP[0] = 1; rsizeP[1] = Nsamp+1;
    rUnif1 = mxCreateDoubleMatrix(1,Nsamp+1,mxREAL);
    mexCallMATLAB(1, &rNorm, 1, &rsize, "rand");   randunif1 = mxGetPr(rNorm);
    randunif1[Nsamp] = 100;
    mexCallMATLAB(1, &rUnif1, 1, &rNorm, "sort");  randunif1 = mxGetPr(rUnif1);
    mxDestroyArray(rNorm);
    rsizeP[0] = Ndens; rsizeP[1] = Nsamp;
    mexCallMATLAB(1, &rUnif2, 1, &rsize, "rand");  randunif2 = mxGetPr(rUnif2);
    rsizeP[0] = Ndim; rsizeP[1] = Nsamp;
    mexCallMATLAB(1, &rNorm, 1, &rsize, "randn");  randnorm  = mxGetPr(rNorm);

    plhs[0] = mxCreateDoubleMatrix(Ndim,Nsamp,mxREAL);
    samples = (double*) mxGetData(plhs[0]);
    plhs[1] = mxCreateNumericMatrix(Ndens,Nsamp,mxUINT32_CLASS,mxREAL);
    indices = (BallTree::index*) mxGetData(plhs[1]);

    SigValsMax = (double*) mxMalloc(Ndim*Ndens*Ndens*sizeof(double));  // precalc'd constants
    SigValsMin = (double*) mxMalloc(Ndim*Ndens*Ndens*sizeof(double));  // precalc'd constants
    C       = (double*) mxMalloc(Ndim*sizeof(double));
    sC      = (double*) mxMalloc(Ndim*sizeof(double));
    M       = (double*) mxMalloc(Ndim*sizeof(double));
  
    total =    -1; soFar = soFarMin = 0;   multiEval();  // calculate total weight
    total = soFar; soFar = soFarMin = 0;   multiEval();  //   then sample

    //  delete[] trees;
    mxFree(trees);

    mxFree(C); mxFree(sC); mxFree(M); mxFree(SigValsMin); mxFree(SigValsMax);

    mxDestroyArray(rUnif1); mxDestroyArray(rUnif2); 
    mxDestroyArray(rNorm); mxDestroyArray(rsize);
  }
#else
  //////////////////////////////////////////////////////////////////////
  // MEX WRAPPER
  //////////////////////////////////////////////////////////////////////
  kde prodSampleEpsilonRun(unsigned int _Ndens, //number of densities to product
                           unsigned int _Nsamp,  //number of samples
                           double _maxErr,  //epsilon
                           std::vector<kde>& kdes)
  {
    unsigned int i;//,j;
  
 
    /*********************************************************************
     ** Verify arguments and initialize variables
     *********************************************************************/

    bool debug = false;

    Ndens = _Ndens;
    Nsamp = _Nsamp;
    maxErr = _maxErr;
    assert(Ndens >= 1);
    assert(Nsamp>= 1);
    // get # of densities
    //trees = new BallTreeDensity[Ndens];
    //trees = (BallTreeDensity*) mxMalloc(Ndens*sizeof(BallTreeDensity));
    bwUniform = true;
    bool allGaussians = true;
    for (i=0;i<Ndens;i++) {                               // load densities
      //trees[i] = BallTreeDensity( mxGetCell(prhs[0],i) ); 
      trees.push_back(BallTreeDensity( kdes[i] ));  
      if (trees[i].getType() != BallTreeDensity::Gaussian) allGaussians = false;
      bwUniform = bwUniform && trees[i].bwUniform();
      //assert(kdes[i].getPoints() < 13);
    }
    if (!allGaussians)
      mexErrMsgTxt("Sorry -- only Gaussian kernels supported");

    Ndim  = trees[0].Ndim();                      // more accessible dimension variable
 
    itpp::vec rUnif1 = zeros(Nsamp+1);
    randv(Nsamp+1, rUnif1);
    rUnif1.set(Nsamp,100);
    itpp::Sort<double> mysort;
    mysort.sort(0,Nsamp,rUnif1);
    randunif1 = vec2vec(&rUnif1); 
    
    itpp::vec rUnif2 = zeros(Nsamp*Nsamp);
    randv(Ndens*Nsamp, rUnif2);
    randunif2 = vec2vec(&rUnif2);
  
    itpp::mat rNorm = zeros(Ndim*Nsamp);
    rNorm = randn1(Ndim, Nsamp);
    randnorm = vec2vec(&rNorm);

    kde out;
    out.centers = itpp::zeros(Ndim, Nsamp); 
    samples = out.centers._data();
    out.indices = itpp::imat(Ndens, Nsamp);
    indices = (BallTree::index*) out.indices._data();

    SigValsMax = (double*) mxMalloc(Ndim*Ndens*Ndens*sizeof(double));  // precalc'd constants
    SigValsMin = (double*) mxMalloc(Ndim*Ndens*Ndens*sizeof(double));  // precalc'd constants
    C       = (double*) mxMalloc(Ndim*sizeof(double));
    sC      = (double*) mxMalloc(Ndim*sizeof(double));
    M       = (double*) mxMalloc(Ndim*sizeof(double));
  
    total =    -1; soFar = soFarMin = 0;   multiEval();  // calculate total weight
    total = soFar; soFar = soFarMin = 0;   multiEval();  //   then sample

    out.ROT();
    out.weights = itpp::ones(1, out.getPoints())/(double)out.getPoints();
    if (debug)
      out.matlab_print();
    //else {printf("."); fflush(NULL);}
    out.indices = out.indices - 1; //c++ count starts from zero
    out.verify();
 
 
    for (size_t i=0; i < trees.size(); i++)
      trees[i].clean();
    trees.clear(); 
 
    return out;
  }

#endif


  double normConstant(void) {
    unsigned int i,j;
    double tmp, normConst;
    //const double pi = 3.141592653589;
  
    normConst = 1;                               // precalculate influence of normalization
    tmp = pow(2*pi,((double)Ndim)/2);
    for (i=0;i<Ndens;i++) {                      // divide by norm fact of each indiv. gauss.
      normConst /= tmp;
      if (bwUniform) for (j=0;j<Ndim;j++) {
          normConst /= sqrt(trees[i].bwMin(0)[j]);
        }
    }
    normConst *= tmp;                            // times norm factor of resulting gaussian
    for (j=0;j<Ndim;j++) {
      tmp = 0;
      if (bwUniform) {
        for (i=0;i<Ndens;i++) tmp += 1/trees[i].bwMin(0)[j];  // compute result bandwidth
        normConst /= sqrt(tmp);                               // and its norm factor
      }
    }
    return normConst;
  }


  //////////////////////////////////////////////////////////////////////////
  // calculate bounds on the min/max distance possible between two ball-trees
  //   return un-exponentiated values
  //
  double minDistProd(const BallTreeDensity& bt1, BallTree::index i,
                     const BallTreeDensity& bt2, BallTree::index j,
                     const double* SigValIJ,const double* SigNIJ) //  precomp'd weighting factors
  {
    double result=0;
    const double *center1, *center2;

    center1 = bt1.center(i); center2 = bt2.center(j);
    for (unsigned int k=0;k<Ndim;k++) {
      double tmp = fabs( center1[k] - center2[k] );
      tmp-= bt1.range(i)[k] + bt2.range(j)[k];
      if (tmp < 0) tmp = 0;
      result -= (tmp*tmp) * SigValIJ[k];
      if (!bwUniform) result += log(SigNIJ[k]);
    }
    result /= 2;
    return result;
  }

  double maxDistProd(const BallTreeDensity& bt1, BallTree::index i,
                     const BallTreeDensity& bt2, BallTree::index j,
                     const double* SigValIJ,const double* SigNIJ) //  precomp'd weighting factors
  {
    double result=0;
    const double *center1, *center2;

    center1 = bt1.center(i); center2 = bt2.center(j);
    for (unsigned int k=0;k<Ndim;k++) {
      double tmp = fabs( center1[k] - center2[k] );
      tmp+= bt1.range(i)[k] + bt2.range(j)[k];
      result -= (tmp*tmp) * SigValIJ[k];
      if (!bwUniform) result += log(SigNIJ[k]);
    }
    result /= 2;
    return result;
  }

  // Compute (1 over) the \Lambda_(i,j) values needed for distance-weight computations
  // 
  void computeSigVals(void) {
    unsigned int i,j,k;
    assert(Ndim > 0);
    double *SigNormMin = (double*) mxMalloc(Ndim*sizeof(double));
    double *SigNormMax = (double*) mxMalloc(Ndim*sizeof(double));
    for (i=0;i<Ndim;i++) {
      SigNormMin[i] = SigNormMax[i] = 0;
      for (j=0;j<Ndens;j++) SigNormMin[i]+=1/trees[j].bwMin(ind[j])[i]; // compute \Lambda_L 
      for (j=0;j<Ndens;j++) SigNormMax[i]+=1/trees[j].bwMax(ind[j])[i]; //
      SigNormMax[i] = 1/SigNormMax[i]; SigNormMin[i] = 1/SigNormMin[i];
    }
    for (i=0;i<Ndim;i++) {
      for (j=0;j<Ndens;j++)                                    //  then compute pairwise leave-
        for (k=j;k<Ndens;k++) {                                //  two-out normalized values
          *SIGVALSMIN(i,k,j) = SigNormMax[i] / (trees[j].bwMin(ind[j])[i]*trees[k].bwMin(ind[k])[i]);
          *SIGVALSMAX(i,k,j) = SigNormMin[i] / (trees[j].bwMax(ind[j])[i]*trees[k].bwMax(ind[k])[i]);
          *SIGVALSMIN(i,j,k) = *SIGVALSMIN(i,k,j);             //  make symmetric
          *SIGVALSMAX(i,j,k) = *SIGVALSMAX(i,k,j);
        }
    }
    //  delete[] SigNorm;  //(don't need this anymore)
    mxFree(SigNormMin);
    mxFree(SigNormMax);

  }

  void multiEvalRecursive(void) {
    unsigned int i,j;
    double minVal=0, maxVal=0;                    // for computing bounds and 
    unsigned int maxInd0, maxInd1;  //  determining which tree to split

    //
    // find min/max values of product
    //
    if (!bwUniform) computeSigVals();

    double maxDiscrep = -1;
    bool allLeaves = true;
    for (i=0; i<Ndens; i++) {                       // For each pair of densities, bound
      for (j=i+1;j<Ndens;j++) {                     //   the total weight of their product:
        double maxValT = minDistProd(trees[i],ind[i],trees[j],ind[j],SIGVALSMAX(0,i,j),SIGVALSMIN(0,i,j));  // compute min & max
        double minValT = maxDistProd(trees[i],ind[i],trees[j],ind[j],SIGVALSMIN(0,i,j),SIGVALSMAX(0,i,j));  // dist = max/min values
        maxVal += maxValT; minVal += minValT;

        if ((maxValT - minValT) > maxDiscrep) {           // also find which pair
          maxDiscrep = maxValT - minValT;                 //   has the largest
          maxInd0=i; maxInd1=j;                           //   discrepancy (A/B)
        }
      }
      allLeaves = allLeaves && trees[i].isLeaf(ind[i]);
    }
    maxVal = exp(maxVal); minVal = exp(minVal);

    // If the approximation is good enough,
    if (allLeaves || fabs(maxVal - minVal) <= maxErr * (soFarMin+minVal) ) {  // APPROXIMATE
      double add = (maxVal + minVal)/2;                   // compute contribution
      for (i=0;i<Ndens;i++) add *= trees[i].weight(ind[i]);
      soFar += add;
      add = minVal; for (i=0;i<Ndens;i++) add *= trees[i].weight(ind[i]);
      soFarMin += add;

      while (*randunif1 <= soFar/total) {                 // for all the samples coming from this block
        randunif1++;
        for (j=0;j<Ndim;j++) M[j] = 0;                    // clear out M
        if (!bwUniform) for (j=0;j<Ndim;j++) C[j] = 0;    // clear out C if necc.

        for (i=0;i<Ndens;i++) {                           // find an index within this block
          double SumTmp = 0;
          BallTree::index index = trees[i].leafFirst(ind[i]);  // start with 1st leaf and
          for (;index <= trees[i].leafLast(ind[i]);index++) {
            SumTmp += trees[i].weight(index) / trees[i].weight(ind[i]);
            if (SumTmp > *randunif2) break;
          }
          randunif2++;
          for (j=0;j<Ndim;j++)                                 // compute product mean:
            M[j] += trees[i].center(index)[j] / trees[i].bw(index)[j];
          *(indices++) = trees[i].getIndexOf(index)+1;         // and save selected indices
          //assert(trees[i].getIndexOf(index)+1 <= Ndens*Nsamp);
          if (!bwUniform) for (j=0;j<Ndim;j++)                 // compute covariance
                            C[j] += 1/trees[i].bw(index)[j];                 //  contribution of each dens.
        }
        if (!bwUniform) for (j=0;j<Ndim;j++) {                 // finish computing covar and
            C[j] = 1/C[j];                                     //  std dev. of product kernel
            sC[j] = sqrt(C[j]);
          }

        for (j=0;j<Ndim;j++) M[j] *= C[j];
        for (j=0;j<Ndim;j++)                              // sample from the product dist.
          *(samples++) = M[j] + sC[j] * (*(randnorm++));
      }

      // Otherwise, we need to subdivide at least one tree:
    } else {                                              // RECURSION  
      unsigned int split;
      double size0 = trees[maxInd0].range(ind[maxInd0])[0];  // from the pair with the largest
      double size1 = trees[maxInd1].range(ind[maxInd1])[0];  // pairwise max-min discrepancy term,

      for(BallTree::index k=0; k<trees[maxInd0].Ndim(); k++)
        if(trees[maxInd0].range(ind[maxInd0])[k] > size0)
          size0 = trees[maxInd0].range(ind[maxInd0])[k];
      for(BallTree::index k=0; k<trees[maxInd1].Ndim(); k++)
        if(trees[maxInd1].range(ind[maxInd1])[k] > size1)
          size1 = trees[maxInd1].range(ind[maxInd1])[k];    

      split = (size0 > size1) ? maxInd0 : maxInd1;        // take the largest.
    
      BallTree::index current = ind[split];
      if (!trees[split].isLeaf(current)) {
        ind[split] = trees[split].left(current);  
        multiEvalRecursive();                             // recurse left 
        ind[split] = trees[split].right(current);         //   and right tree
        multiEvalRecursive();                             // restore indices 
        ind[split] = current;                             //   for calling f'n
      }                                                   
    }
  }


  void multiEval(void) {
    unsigned int i,j;//,k;
    //  ind = new BallTree::index[Ndens];               // construct index array  

    assert(Ndens>0);
    ind = (BallTree::index*) mxMalloc(Ndens*sizeof(BallTree::index));    // construct index array  
    memset(ind, 0, Ndens * sizeof(BallTree::index));
    for (i=0;i<Ndens;i++) ind[i] = trees[i].root(); //  & init to root node

    if (bwUniform) {                                     // if all one kernel size, do this in
      computeSigVals();                                  //   one operation.
      for (i=0;i<Ndim;i++) {                             // compute covariance and
        double tmp = 0;                                  //   std. deviation of a
        for (j=0;j<Ndens;j++)                            // resulting product kernel 
          tmp += 1/trees[j].bw(trees[j].leafFirst(trees[j].root()))[i]; 
        C[i] = 1/tmp;
        sC[i] = sqrt(C[i]);
      }
    }

    multiEvalRecursive();

    //  delete[] ind;
    mxFree(ind);
  }
}; //class
inline void test_product(){
  printf("testing product..\n");
  kde k = kde("3 1", "1 1", "1 2");
  kde j = kde("2", ".5", "1");
  std::vector<kde> vecs;
  vecs.push_back(k);
  vecs.push_back(j);
  prodSampleEpsilon prod;
  kde out = prod.prodSampleEpsilonRun(2,48,1e-5,vecs);
  out.matlab_print();
  out.verify();
}


#endif


================================================
FILE: toolkits/graphical_models/deprecated/loopybp_denoise.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab used for discrete loopy
 * belief propagation in a pairwise markov random field to denoise a
 * synthetic noisy image.
 *
 *  \author Joseph Gonzalez
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>


#include <graphlab.hpp>

// #include "image.hpp"
#include "factors/factor_includes.hpp"


#include <cv.h>
#include <highgui.h>  


// Include the macro for the for each operation
#include <graphlab/macros_def.hpp>


// Global variables
binary_factor EDGE_FACTOR;
size_t NCOLORS;
double SIGMA;
double BOUND;
double DAMPING;


// STRUCTS (Edge and Vertex data) =============================================>

/**
 * The data associated with each variable in the pairwise markov
 * random field
 */
struct vertex_data : graphlab::IS_POD_TYPE {
  float obs_color;
  uint16_t true_color, pred_color;
  vertex_data(float obs_color = 0, uint16_t true_color = 0) : 
    obs_color(obs_color), true_color(true_color), pred_color(obs_color) { }
}; // End of vertex data


/**
 * The data associated with each directed edge in the pairwise markov
 * random field
 */
class edge_data {
  unary_factor messages[4];
  size_t message_idx(size_t source_id, size_t target_id, bool is_new) {
    return size_t(source_id < target_id)  + 2 * size_t(is_new);
  }
public:
  edge_data() { }
  edge_data(graphlab::vertex_id_type v1,
            graphlab::vertex_id_type v2,
            size_t ncolors) {
    for(size_t i = 0; i < 4; ++i) {
      messages[i].resize(ncolors);
      messages[i].uniform();
    }
    message(v1, v2).var() = v2;
    old_message(v1, v2).var() = v2;
    message(v2, v1).var() = v1;
    old_message(v2, v1).var() = v1;
  } // end of constructor

  unary_factor& message(size_t source_id, size_t target_id) { 
    return messages[message_idx(source_id, target_id, true)];
  }
  unary_factor& old_message(size_t source_id, size_t target_id) { 
    return messages[message_idx(source_id, target_id, false)];
  }
  void update_old(size_t source_id, size_t target_id) { 
    old_message(source_id, target_id) = message(source_id, target_id);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 4; ++i) arc << messages[i];
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 4; ++i) arc >> messages[i];
  }
}; // End of edge data


/**
 * The type of the distributed graph representing the MRF.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;

/** 
 * The gather_type for the vertex program needs to compute *= in place
 * of += so we create a new type which convertes computes *= for +=.
 */
struct factor_product {
  unary_factor factor;
  factor_product(const unary_factor& factor = unary_factor()) : 
    factor(factor) { }
  factor_product& operator+=(const factor_product& other) {
    ASSERT_EQ(factor.arity(), other.factor.arity());
    factor *= other.factor;
    return *this;
  }
  void save(graphlab::oarchive& arc) const { arc << factor; }
  void load(graphlab::iarchive& arc) { arc >> factor; }
}; // end of struct factor product


/** 
 * Belief Propagation Vertex Program
 *
 */
class bp_vertex_program : 
  public graphlab::ivertex_program< graph_type, factor_product,
                                    graphlab::messages::sum_priority > {
private:
  /**
   * The belief estimate for this vertex program
   */
  unary_factor belief;
public:
  void save(graphlab::oarchive& arc) const { arc << belief; }
  void load(graphlab::iarchive& arc) { arc >> belief; }

  /**
   * Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * Update the old message to be the new message and collect the
   * message value.
   */
  gather_type gather(icontext_type& context, 
                     const vertex_type& vertex, 
                     edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    edata.update_old(other_vertex.id(), vertex.id());
    return factor_product(edata.old_message(other_vertex.id(), vertex.id()));
  }; // end of gather function

  /**
   * Multiply message product by node potential and update the belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
             const gather_type& total) {
    // construct the node potential
    belief = make_potential(vertex);
    ASSERT_EQ(belief.arity(), total.factor.arity());
    // multiply in the rest of the message product;
    belief *= total.factor;
    belief.normalize();
    // compute the predicted value
    vertex.data().pred_color = belief.max_asg();
  }; // end of apply

  /**
   * Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /**
   * Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {  
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // construct the cavity
    unary_factor cavity = belief;
    cavity /= edata.old_message(other_vertex.id(), vertex.id());
    cavity.normalize();
    // compute the new message
    unary_factor& new_message = 
      edata.message(vertex.id(), other_vertex.id());
    const unary_factor& old_message = 
      edata.old_message(vertex.id(), other_vertex.id());
    ASSERT_NE(&new_message, &old_message);
    new_message.convolve(EDGE_FACTOR, cavity);
    new_message.normalize();
    new_message.damp(old_message, DAMPING);
    // Compute message residual
    const double residual = new_message.residual(old_message);  
    context.clear_gather_cache(other_vertex);
    // Schedule the adjacent vertex
    if(residual > BOUND) context.signal(other_vertex, residual);
 }; // end of scatter

private:
  /**
   * Construct the unary evidence potential
   */
  unary_factor make_potential(const vertex_type& vertex) const {
    unary_factor potential(vertex.id(), NCOLORS);
    const double obs = vertex.data().obs_color;
    const double sigmaSq = SIGMA*SIGMA;
    for(size_t pred = 0; pred < potential.arity(); ++pred) {
      potential.logP(pred) = 
        -(obs - pred)*(obs - pred) / (2.0 * sigmaSq);
    }
    potential.normalize();
    return potential;
  } // end of make_potentail

  /**
   * Return the other vertex
   */
  vertex_type get_other_vertex(edge_type& edge, 
                               const vertex_type& vertex) const {
    return vertex.id() == edge.source().id()? edge.target() : edge.source();
  }; // end of other_vertex

}; // end of class bp_vertex_program

/**
 * Define the engine type
 */
//typedef graphlab::synchronous_engine<bp_vertex_program> engine_type;
typedef graphlab::async_consistent_engine<bp_vertex_program> engine_type;


/**
 * construct the synthetic image graph.
 */
void create_synthetic_mrf(graphlab::distributed_control& dc,
                          graph_type& graph, 
                          const size_t rows, const size_t cols); 

               
template<typename T>
struct merge_reduce {
  std::vector<T> values;
  void save(graphlab::oarchive& arc) const { arc << values; }
  void load(graphlab::iarchive& arc) { arc >> values; }
  merge_reduce& operator+=(const merge_reduce& other) {
    values.insert(values.end(), other.values.begin(), 
                  other.values.end());
    return *this;
  }
}; // end of merge_reduce

typedef std::pair<graphlab::vertex_id_type, float> pred_pair_type; 
typedef merge_reduce<pred_pair_type> merge_reduce_type;

merge_reduce_type pred_map_function(graph_type::vertex_type vertex) {
  merge_reduce<pred_pair_type> ret;
  ret.values.push_back(pred_pair_type(vertex.id(), vertex.data().pred_color));
  return ret;
} // end of pred_map_function

merge_reduce_type obs_map_function(graph_type::vertex_type vertex) {
  merge_reduce<pred_pair_type> ret;
  ret.values.push_back(pred_pair_type(vertex.id(), vertex.data().obs_color));
  return ret;
} // end of obs_map_function


/**
 * Save the image data in the vector of pairs to an image file
 */
void save_image(const size_t rows, const size_t cols,
                const std::vector<pred_pair_type>& values,
                const std::string& fname);


// MAIN =======================================================================>
int main(int argc, char** argv) {
  std::cout << "This program creates and denoises a synthetic " << std::endl
            << "image using loopy belief propagation inside " << std::endl
            << "the graphlab framework." << std::endl;

  // // set the global logger
  // global_logger().set_log_level(LOG_WARNING);
  // global_logger().set_log_to_console(true);

  // Set initial values for members ------------------------------------------->
  NCOLORS = 5;
  SIGMA = 2;
  BOUND = 1E-4;
  DAMPING = 0.1;
 
  size_t nrows = 200;
  size_t ncols = 200;
  double lambda = 2;

  std::string smoothing = "laplace";
  std::string orig_fn = "source_img.jpeg";
  std::string noisy_fn = "noisy_img.jpeg";
  std::string pred_fn = "pred_img.jpeg";

  // std::string orig_fn = "source_img.pgm";
  // std::string noisy_fn = "noisy_img.pgm";
  // std::string pred_fn = "pred_img.pgm";


  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Loopy BP image denoising");
  clopts.attach_option("bound",
                       &BOUND, BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping",
                       &DAMPING, DAMPING,
                       "The amount of message damping (higher = more damping)");
  clopts.attach_option("ncolors",
                       &NCOLORS, NCOLORS,
                       "The number of colors in the noisy image");
  clopts.attach_option("sigma",
                       &SIGMA, SIGMA,
                       "Standard deviation of noise.");
  clopts.attach_option("nrows",
                       &nrows, nrows,
                       "The number of rows in the noisy image");
  clopts.attach_option("ncols",
                       &ncols, ncols,
                       "The number of columns in the noisy image");
  clopts.attach_option("lambda",
                       &lambda, lambda,
                       "Smoothness parameter (larger => smoother).");
  clopts.attach_option("smoothing",
                       &smoothing, smoothing,
                       "Options are {square, laplace}");
  clopts.attach_option("orig",
                       &orig_fn, orig_fn,
                       "Original image file name.");
  clopts.attach_option("noisy",
                       &noisy_fn, noisy_fn,
                       "Noisy image file name.");
  clopts.attach_option("pred",
                       &pred_fn, pred_fn,
                       "Predicted image file name.");
    

  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  const bool success = clopts.parse(argc, argv);
  if(!success) {
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  ///! Create a distributed control object 
  graphlab::distributed_control dc;
  ///! display settings  
  if(dc.procid() == 0) {
    std::cout << "ncpus:          " << clopts.get_ncpus() << std::endl
              << "bound:          " << BOUND << std::endl
              << "damping:        " << DAMPING << std::endl
              << "colors:         " << NCOLORS << std::endl
              << "nrows:           " << nrows << std::endl
              << "ncols:           " << ncols << std::endl
              << "sigma:          " << SIGMA << std::endl
              << "lambda:         " << lambda << std::endl
              << "smoothing:      " << smoothing << std::endl
              << "scheduler:      " << clopts.get_scheduler_type() << std::endl
              << "orig_fn:        " << orig_fn << std::endl
              << "noisy_fn:       " << noisy_fn << std::endl
              << "pred_fn:        " << pred_fn << std::endl;
  }

  
  // Create synthetic images -------------------------------------------------->
  std::cout << "Creating a synthetic noisy image." << std::endl;
  graph_type graph(dc, clopts);
  create_synthetic_mrf(dc, graph, nrows, ncols);
  std::cout << "Finalizing the graph." << std::endl;
  graph.finalize();
  if(dc.procid() == 0) {
    std::cout << "Number of vertices: " << graph.num_vertices() << std::endl
              << "Number of edges:    " << graph.num_edges() << std::endl;
  }

  std::cout << "Collect the noisy image. " << std::endl;
  merge_reduce_type obs_image = 
    graph.map_reduce_vertices<merge_reduce_type>(obs_map_function);
  std::cout << "saving the noisy image." << std::endl;
  if(dc.procid() == 0) {
    save_image(nrows, ncols, obs_image.values, noisy_fn);
  }

  // Initialze the edge factor ----------------------------------------------->
  std::cout << "Initializing shared edge agreement factor. " << std::endl;
  // dummy variables 0 and 1 and num_rings by num_rings
  EDGE_FACTOR = binary_factor(0, NCOLORS, 0, NCOLORS);
  // Set the smoothing type
  if(smoothing == "square") {
    EDGE_FACTOR.set_as_agreement(lambda);
  } else {
    EDGE_FACTOR.set_as_laplace(lambda);
  } 
  if(dc.procid() == 0)
    std::cout << EDGE_FACTOR << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  engine_type engine(dc, graph, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;  


  // Saving the output -------------------------------------------------------->
  std::cout << "Saving the predicted image" << std::endl;
  std::cout << "Collect the noisy image. " << std::endl;
  merge_reduce_type pred_image = 
    graph.map_reduce_vertices<merge_reduce_type>(pred_map_function);
  std::cout << "saving the pred image." << std::endl;
  if(dc.procid() == 0) {
    save_image(nrows, ncols, pred_image.values, pred_fn);
  }

  std::cout << "Done!" << std::endl;
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // End of main


graphlab::vertex_id_type sub2ind(size_t rows, size_t cols,
                                 size_t r, size_t c) {
  return r * cols + c;
}; // end of sub2ind

std::pair<int,int> ind2sub(size_t rows, size_t cols,
                           size_t ind) {
  return std::make_pair(ind / cols, ind % cols);
}; // end of sub2ind


void create_synthetic_mrf(graphlab::distributed_control& dc,
                          graph_type& graph,
                          const size_t rows, const size_t cols) {
  dc.barrier();
  const double center_r = rows / 2.0;
  const double center_c = cols / 2.0;
  const double max_radius = std::min(rows, cols) / 2.0;
 
  for(size_t r = dc.procid(); r < rows; r += dc.numprocs()) {
    for(size_t c = 0; c < cols; ++c) {
      // Compute the true pixel value
      const double distance = sqrt((r-center_r)*(r-center_r) + 
                                   (c-center_c)*(c-center_c));
      // Compute ring of sunset
      const uint16_t ring_color =  
        std::floor(std::min(1.0, distance/max_radius) * (NCOLORS - 1) );
      // Compute the true pixel color by masking with the horizon
      const uint16_t true_color = r < rows/2 ? ring_color : 0;
      // compute the predicted color
      const float obs_color = true_color + graphlab::random::normal(0, SIGMA);
      // determine the true pixel id
      const graphlab::vertex_id_type vid = sub2ind(rows,cols,r,c);
      const vertex_data vdata(obs_color, true_color);
      graph.add_vertex(vid, vdata);
      // Add the edges
      if(r + 1 < rows) 
        graph.add_edge(vid, sub2ind(rows,cols,r+1,c),
                       edge_data(vid, sub2ind(rows,cols,r+1,c), NCOLORS));
      if(c + 1 < cols) 
        graph.add_edge(vid, sub2ind(rows,cols,r,c+1),
                       edge_data(vid, sub2ind(rows,cols,r,c+1), NCOLORS));
    } // end of loop over cols
  } // end of loop over rows
  dc.barrier();
}; // end of create synthetic mrf


// void save_image(const size_t rows, const size_t cols,
//                 const std::vector<pred_pair_type>& values,
//                 const std::string& fname) {
//   std::cout << "NPixels: " << values.size() << std::endl;
//   image img(rows, cols);
//   foreach(pred_pair_type pair, values) 
//     img.pixel(pair.first) = pair.second;
//   img.save(fname);
// }


void save_image(const size_t rows, const size_t cols,
                const std::vector<pred_pair_type>& values,
                const std::string& fname) {
  std::cout << "NPixels: " << values.size() << std::endl;
  // determine the max and min colors
  float max_color = -std::numeric_limits<float>::max();
  float min_color =  std::numeric_limits<float>::max();
  foreach(pred_pair_type pair, values) {
    max_color = std::max(max_color, pair.second);
    min_color = std::min(min_color, pair.second);
  }

  cv::Mat img(cols, rows, CV_8UC1);
  foreach(pred_pair_type pair, values) {
    std::pair<int,int> coords = ind2sub(rows,cols, pair.first);
    float value = (pair.second - min_color) / (max_color - min_color);
    int color = 255 * value > 255 ? 255 : 255 * value;
    img.at<unsigned char>(coords.first, coords.second) = color;
  }
  cv::imwrite(fname, img);
}


================================================
FILE: toolkits/graphical_models/eigen_serialization.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#include "eigen_serialization.hpp"


graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::VectorXd& vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  const index_type size = vec.size();
  arc << size;
  graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
  return arc;
} // end of save vector

graphlab::iarchive& operator>>(graphlab::iarchive& arc, Eigen::VectorXd& vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  index_type size = 0;
  arc >> size;
  vec.resize(size);
  graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
  return arc;
} // end of save vector


graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::MatrixXd& mat) {
  typedef Eigen::MatrixXd::Index index_type;
  typedef Eigen::MatrixXd::Scalar scalar_type;
  const index_type rows = mat.rows();
  const index_type cols = mat.cols();
  arc << rows << cols;
  graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
  return arc;
} // end of save matrix

graphlab::iarchive& operator>>(graphlab::iarchive& arc,  Eigen::MatrixXd& mat) {
  typedef Eigen::MatrixXd::Index index_type; 
  typedef Eigen::MatrixXd::Scalar scalar_type;
  index_type rows=0, cols=0;
  arc >> rows >> cols;
  mat.resize(rows,cols);
  graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
  return arc;
} // end of load matrix


================================================
FILE: toolkits/graphical_models/eigen_serialization.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#ifndef EIGEN_SERIALIZATION_HPP
#define EIGEN_SERIALIZATION_HPP


#include <Eigen/Dense>

#include <graphlab.hpp>


BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  const index_type size = vec.size();
  arc << size;
  graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::VectorXd, vec) {
  typedef Eigen::VectorXd::Index index_type;
  typedef Eigen::VectorXd::Scalar scalar_type;
  index_type size = 0;
  arc >> size;
  vec.resize(size);
  graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


BEGIN_OUT_OF_PLACE_SAVE(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type;
  typedef Eigen::MatrixXd::Scalar scalar_type;
  const index_type rows = mat.rows();
  const index_type cols = mat.cols();
  arc << rows << cols;
  graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_SAVE()


BEGIN_OUT_OF_PLACE_LOAD(arc, Eigen::MatrixXd, mat) {
  typedef Eigen::MatrixXd::Index index_type; 
  typedef Eigen::MatrixXd::Scalar scalar_type;
  index_type rows=0, cols=0;
  arc >> rows >> cols;
  mat.resize(rows,cols);
  graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
} END_OUT_OF_PLACE_LOAD()


// inline graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::VectorXd& vec) {
//   typedef Eigen::VectorXd::Index index_type;
//   typedef Eigen::VectorXd::Scalar scalar_type;
//   const index_type size = vec.size();
//   arc << size;
//   graphlab::serialize(arc, vec.data(), size * sizeof(scalar_type));
//   return arc;
// } // end of save vector

// inline graphlab::iarchive& operator>>(graphlab::iarchive& arc, Eigen::VectorXd& vec) {
//   typedef Eigen::VectorXd::Index index_type;
//   typedef Eigen::VectorXd::Scalar scalar_type;
//   index_type size = 0;
//   arc >> size;
//   vec.resize(size);
//   graphlab::deserialize(arc, vec.data(), size * sizeof(scalar_type));
//   return arc;
// } // end of save vector


// inline graphlab::oarchive& operator<<(graphlab::oarchive& arc, const Eigen::MatrixXd& mat) {
//   typedef Eigen::MatrixXd::Index index_type;
//   typedef Eigen::MatrixXd::Scalar scalar_type;
//   const index_type rows = mat.rows();
//   const index_type cols = mat.cols();
//   arc << rows << cols;
//   graphlab::serialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
//   return arc;
// } // end of save matrix

// inline graphlab::iarchive& operator>>(graphlab::iarchive& arc,  Eigen::MatrixXd& mat) {
//   typedef Eigen::MatrixXd::Index index_type; 
//   typedef Eigen::MatrixXd::Scalar scalar_type;
//   index_type rows=0, cols=0;
//   arc >> rows >> cols;
//   mat.resize(rows,cols);
//   graphlab::deserialize(arc, mat.data(), rows*cols*sizeof(scalar_type));
//   return arc;
// } // end of load matrix


#endif


================================================
FILE: toolkits/graphical_models/factors/CMakeLists.txt
================================================
project(GraphLab)
# add_library(factors STATIC
#   binary_factor.cpp
#   discrete_variable.cpp
#   unary_factor.cpp)

subdirs(tests)


================================================
FILE: toolkits/graphical_models/factors/bp_graph_data.h
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef VSI_BP_GRAPH_DATA_H
#define VSI_BP_GRAPH_DATA_H

#include <cassert>

#include <iostream>
#include <fstream>
#include <sstream>

#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include "table_factor.hpp"


namespace belief_prop {


/**
 * The type of the distributed graph representing the Factor Graph.
 */
template<size_t MAX_DIM> class vertex_data;
template<size_t MAX_DIM> class edge_data;

template<size_t MAX_DIM>
struct graph_type {
  typedef graphlab::distributed_graph<vertex_data<MAX_DIM>, edge_data<MAX_DIM> > type;
};


// Edge and Vertex data =============================================>

/**
 * The data associated with each variable in the factor graph
 */
template<size_t MAX_DIM>
class vertex_data {
  typedef graphlab::table_factor<MAX_DIM> factor_type;

public:
  double DAMPING;
  double BOUND;
  double REGULARIZATION;

  std::string name;
  // might be nice to be able to use different datatypes, however any
  // such class would be required to implement the same protocols as 
  // table_factor (and table_base if it was to use bp_vertex_program) 
  factor_type potential;
  factor_type belief;
  bool        isVariable; // is the vertex a variable or a factor 

  vertex_data() : 
      DAMPING(0.0),
      BOUND(0.0),
      REGULARIZATION(0.0),
      name(""),
      potential(factor_type::nil), 
      belief(factor_type::nil),
      isVariable(false) { } 

  vertex_data(const factor_type& potential_, 
              const factor_type& belief_, 
              bool isVariable_,
              std::string name_ = "") : 
      DAMPING(0.0), 
      BOUND(0.0), 
      REGULARIZATION(0.0),
      name(name_), 
      potential(potential_), 
      belief(belief_),
      isVariable(isVariable_) { }
  
  void load(graphlab::iarchive& arc) {
    arc >> DAMPING;
    arc >> BOUND;
    arc >> REGULARIZATION;
    arc >> name;
    arc >> potential;
    arc >> belief;
    arc >> isVariable;
  }
  void save(graphlab::oarchive& arc) const {
    arc << DAMPING;
    arc << BOUND;
    arc << REGULARIZATION;
    arc << name;
    arc << potential;
    arc << belief;
    arc << isVariable;
  }
}; // End of vertex data


/**
 * The data associated with each edge in the factor graph
 */
template<size_t MAX_DIM>
class edge_data {
  // REVIEW this could be a dense_table<1>, but not sure how operations
  // on a dense_table<16> would work
  typedef graphlab::dense_table<MAX_DIM> msg_type;

  msg_type messages[4];

  size_t message_idx(size_t source_id, size_t target_id, bool is_new) {
    return size_t(source_id < target_id) + 2 * size_t(is_new);
  }
public:
  edge_data() { 
    for(size_t i = 0; i < 4; ++i) {
      messages[i] = msg_type();
    }
  } // end of constructor

  edge_data(const msg_type& msg) {
    for(size_t i = 0; i < 4; ++i) {
      messages[i] = msg;
    }
  } // end of constructor

  msg_type& message(size_t source_id, size_t target_id) { 
    return messages[message_idx(source_id, target_id, true)];
  }
  msg_type& old_message(size_t source_id, size_t target_id) { 
    return messages[message_idx(source_id, target_id, false)];
  }
  void update_old(size_t source_id, size_t target_id) { 
    old_message(source_id, target_id) = message(source_id, target_id);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 4; ++i) arc << messages[i];
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 4; ++i) arc >> messages[i];
  }
}; // End of edge data


} // end of namespace belief_prop

#endif // VSI_BP_GRAPH_DATA_H


================================================
FILE: toolkits/graphical_models/factors/bp_vertex_program.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef VSI_BP_VERTEX_PROGRAM_HPP
#define VSI_BP_VERTEX_PROGRAM_HPP

/**
 * This file defines the max-sum vertex program for beleif propagation.
 *
 * \author Scott Richardson     10/2012
 */

#include <cassert>

#include <iostream>
#include <fstream>
#include <sstream>

#include <graphlab/logger/logger.hpp>
#include <graphlab/logger/assertions.hpp>

#include "table_factor.hpp"
#include "bp_graph_data.h"


namespace belief_prop {


/** 
 * The gather_type for the vertex program needs to compute *= in place
 * of += so we create a new type which computes *= for +=.
 */
template<size_t MAX_DIM>
class factor_product {
  typedef graphlab::table_factor<MAX_DIM>    factor_type;

public:
  factor_type factor;
  // REVIEW deep copying the factor around could get expensive.
  //   after profiling, it seems most of this is either negligible or gets
  //   optimized out
  factor_product(const factor_type& factor = factor_type()) : 
      factor(factor) { }
  factor_product& operator+=(const factor_product& other) {
    DCHECK_EQ(factor.table()->numel(), other.factor.table()->numel());
    factor *= other.factor;
    return *this;
  }
  void save(graphlab::oarchive& arc) const { arc << factor; }
  void load(graphlab::iarchive& arc) { arc >> factor; }
}; // end of struct factor product


/** 
 * Belief Propagation Vertex Program. As implemented, this "program" 
 * performs the max-sum algorithm. GraphLab runs this program at 
 * every vertex.
 * 
 * \author Scott Richardson
 */
template<size_t MAX_DIM>
class bp_vertex_program : 
    public graphlab::ivertex_program< typename graph_type<MAX_DIM>::type, 
                                      factor_product<MAX_DIM>,
                                      graphlab::messages::sum_priority >
{
  // unfortunately this is necessary...from C++ Standard 14.6.2/3:
  // "In the definition of a class template or a member of a class template, if a
  // base class of the class template depends on a template-parameter, the base 
  // class scope is not examined during unqualified name lookup either at the 
  // point of definition of the class template or member or during an instantiation 
  // of the class template or member.
  typedef graphlab::ivertex_program< typename graph_type<MAX_DIM>::type, 
                                     factor_product<MAX_DIM>,
                                     graphlab::messages::sum_priority > ivertex_program_t;
  // NOTE there is a bug in GCC < 4.7 which prevents these using declarations from 
  // compiling (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14258)
  //using typename ivertex_program_t::edge_dir_type;
  //using typename ivertex_program_t::vertex_type;
  //using typename ivertex_program_t::edge_type;
  typedef typename ivertex_program_t::edge_dir_type edge_dir_type;
  typedef typename ivertex_program_t::vertex_type   vertex_type;
  typedef typename ivertex_program_t::edge_type     edge_type;

  typedef vertex_data<MAX_DIM>             vertex_data_t;
  typedef edge_data<MAX_DIM>               edge_data_t;
  typedef graphlab::table_factor<MAX_DIM>  factor_type; // vertex_data_t::factor_type
  typedef graphlab::dense_table<MAX_DIM>   msg_type;    // edge_data_t::msg_type

public:
  //using typename ivertex_program_t::gather_type;
  //using typename ivertex_program_t::icontext_type;
  typedef typename ivertex_program_t::gather_type gather_type;
  typedef typename ivertex_program_t::icontext_type icontext_type;

public:
  bp_vertex_program() { }

  /**
   * Since we are handling edge direction ourselves, we will use all edges for 
   * gather and scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                       const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * Update the old message to be the new message and collect the
   * message value.
   */
  gather_type gather(icontext_type& context, 
               const vertex_type& vertex, edge_type& edge) const {
    // NOTE While gather() technically has a non-const reference to the  
    // source and target vertex data through edge.source() and edge.target(), 
    // it should not modify them. The data on the edge (accessible through 
    // edge.data()) is modifiable however.
    const vertex_data_t&  vdata = vertex.data();
    const vertex_type     other_vertex = get_other_vertex(edge, vertex);
    const vertex_data_t&  ovdata = other_vertex.data();
    edge_data_t& edata = edge.data();
    logstream(LOG_DEBUG) << "in bp_vertex_program::gather(): compute message to '" 
        << vdata.name << "' from vertex '" << ovdata.name << "'" << std::endl;

    // Update the old message with the value of the new message. We
    // then receive the old message during gather and then compute the
    // "cavity" during scatter (again using the old message).
    edata.update_old(other_vertex.id(), vertex.id());
    msg_type& msg = edata.old_message(other_vertex.id(), vertex.id());
    logstream(LOG_DEBUG) << "edata=" << msg << std::endl;

    gather_type& rep = repmat(msg, vdata);
    logstream(LOG_EVERYTHING) << "repmat-ed msg=" << rep.factor << std::endl;

    logstream(LOG_DEBUG) << "END bp_vertex_program::gather()" << std::endl; 
    return rep;
  }; // end of gather function

  /**
   * Multiply message product by node potential and update the belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
       const gather_type& total) {
    if(vertex.num_in_edges() + vertex.num_out_edges() == 0) return; 

    // factor_type knows it is in log space (so it adds)
    vertex_data_t& vdata = vertex.data();

    logstream(LOG_DEBUG) << "in bp_vertex_program::apply(): vertex = '" 
        << vdata.name << "'" << std::endl;

    //vdata.belief = vdata.potential * total.factor;
    vdata.belief = vdata.potential;
    //vdata.belief.table()->copy_onto(*(vdata.potential.table())); // should be faster...
    vdata.belief *= total.factor;
    logstream(LOG_EVERYTHING) << "vdata.potential=" << vdata.potential << std::endl;
    logstream(LOG_EVERYTHING) << "total.factor=" << total.factor << std::endl;
    logstream(LOG_EVERYTHING) << "vdata.belief=vdata.potential * total.factor = " 
        << vdata.belief << std::endl;
    if(vdata.isVariable == true) {
      logstream(LOG_INFO) << "belief-prop variable state = '" 
          << vdata.name << "' "
          << vdata.belief << std::endl;
    }

    DCHECK_GT(vdata.belief.table()->numel(), 0);
    // Rescale the belief to ensure numerical stability. (This is
    // essentially normalization in log-space.)
    // REVIEW is this needed to match belief_prop
    //vdata.belief.table()->shift_normalize();
    //logstream(LOG_DEBUG) << "vdata.shift_normalized=" << vdata.belief << std::endl;
    logstream(LOG_DEBUG) << "END bp_vertex_program::apply()" << std::endl;
  }; // end of apply

  /**
   * Since we are handling edge direction ourselves, we will use all edges for 
   * gather and scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /**
   * Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    const vertex_data_t&  vdata = vertex.data();
    const vertex_type     other_vertex = get_other_vertex(edge, vertex);
    const vertex_data_t&  ovdata = other_vertex.data();
    edge_data_t& edata = edge.data();
    logstream(LOG_DEBUG)
        << "in bp_vertex_program::scatter(): compute message from '" << vdata.name 
        << "' to vertex '" << ovdata.name << "'" << std::endl;

    // construct the cavity
    //factor_type cavity = vdata.belief;
    factor_type& cavity = init_factor(vdata.belief).factor; // initilizes the cache factor
    logstream(LOG_EVERYTHING) << "factor=" << cavity << std::endl;
    const msg_type& incoming_message = edata.old_message(other_vertex.id(), vertex.id());
    cavity /= incoming_message;
    logstream(LOG_DEBUG) << "incoming_message=" << incoming_message << std::endl;
    logstream(LOG_EVERYTHING) << "cavity=" << cavity << std::endl;
    //cavity.table()->normalize();

    // compute the new outgoing message
    const msg_type& old_message = 
        edata.old_message(vertex.id(), other_vertex.id());
    msg_type& new_message = 
        edata.message(vertex.id(), other_vertex.id());
    DCHECK_NE(&new_message, &old_message);

    // max-product algorithm 
    cavity.table()->MAP(new_message);
    // sum-product algorithm
    //cavity.table()->marginalize(new_message);


    if(vdata.REGULARIZATION > 0.0) {
      // shift normalize
      new_message.shift_normalize(); 
      //logstream(LOG_DEBUG) << "normalized=" << new_message << std::endl;
      // regularize
      logstream(LOG_DEBUG) << "regularization_value=" << vdata.REGULARIZATION << std::endl;
      msg_type reg(new_message.domain());
      reg.uniform(1.0);
      new_message.damp(reg, vdata.REGULARIZATION);
      logstream(LOG_DEBUG) << "regularized=" << new_message << std::endl;
    }

    // shift normalize
    new_message.shift_normalize(); 
    logstream(LOG_DEBUG) << "normalized=" << new_message << std::endl;
    msg_type raw_message(new_message);

    // dampen
    new_message.damp(old_message, vdata.DAMPING);
    logstream(LOG_DEBUG) << "damped=" << new_message << std::endl;

    // Compute message residual
    //const double residual = new_message.l1_diff(old_message);
    // dec: The literature seems to indicate that the l_inf norm is better, so lets try that.
    const double residual = new_message.linf_diff(old_message);
    logstream(LOG_INFO) << "residual=" << residual << std::endl;

    context.clear_gather_cache(other_vertex);
    // to prevent drift, we may want to only update the new_message   
    // if the residual is greater than the BOUND because we only 
    // signal the neighboring node when this is true. however this
    // doesnt work as well as expected...
    logstream(LOG_INFO) << "belief-prop message from '" 
        << vdata.name << "' to vertex '" << ovdata.name << "':" 
        << " raw=" << new_message // NOTE newlines here can break atomicity...
        << " damped=" << raw_message 
        << std::endl;
    // Schedule the adjacent vertex
    if(residual > vdata.BOUND) {
      context.signal(other_vertex, residual);
    }
//    else {
//      new_message = old_message; 
//    } 

    logstream(LOG_DEBUG) << "END bp_vertex_program::scatter()" << std::endl;
  }; // end of scatter

  /** Save the values to a binary archive */
  // NOTE no need to serialize the contents of cache, although it is not POD, 
  // so we must serialize something
  void save(graphlab::oarchive& arc) const { arc << gather_type(); }

  /** Read the values from a binary archive */
  void load(graphlab::iarchive& arc) { arc >> cache; }  

private:
  /**
   * Initilize 'gather_type cache'---a factor that is re-used to avoid costly 
   * data-structure construction.
   */
  // REVIEW not sure if this would work in async mode
  gather_type& init_factor(const factor_type& other) const {
    if(cache.factor.table_storage() == factor_type::nil) {
      cache.factor = other;
    } else {
      cache.factor.table()->copy_onto(*(other.table()));
    }

    return cache;
  }
  /** 
   * Return msg copied (broadcasted) across the domain defined by vdata
   */
  // you cant multiply (or add) two edge-messages because the intersection of
  // their domains is null (cf. dense_table::operator*() => dense_table::logP(asg) => 
  // discrete_assignment::restrict() fails), so i repmat the message to cover the domain
  gather_type& repmat(const msg_type& msg, const vertex_data_t& vdata) const {
    gather_type& ones = init_factor(vdata.belief);
    //ones = vdata.belief;
    ones.factor.table()->zero();
    // NOTE implicit broadcasting
    // NOTE factor_type knows it is in log space (so it adds)
    ones.factor *= msg;

    return ones;
  }
  /**
   * Return the other vertex
   */
  const vertex_type get_other_vertex(edge_type& edge, 
                               const vertex_type& vertex) const {
    return vertex.id() == edge.source().id() ? edge.target() : edge.source();
  }

private:
  // keeping a cache here has very good storage requirements as opposed to in vector_data. 
  // NOTE no need to serialize.
  mutable gather_type cache;
}; // end of class bp_vertex_program


} // end of namespace belief_prop

#endif // VSI_BP_VERTEX_PROGRAM_HPP


================================================
FILE: toolkits/graphical_models/factors/dense_table.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef DENSE_TABLE_HPP
#define DENSE_TABLE_HPP


/**
 * This file contains the definitions of some of the basic factor
 * types needed for loopy belief propagation. 
 *
 *  \author Joseph Gonzalez
 *  \author Scott Richardson     09/2012
 *          
 */


// INCLUDES ===================================================================>

// Including Standard Libraries
#include <cassert>
#include <cmath>

#include <iostream>
#include <algorithm>
#include <iterator>
#include <limits>
#include <vector>
#include <set>


// Random number generation
#include <graphlab/util/random.hpp>

#include <graphlab/serialization/serialization_includes.hpp>

#include "discrete_variable.hpp"
#include "discrete_domain.hpp"
#include "discrete_assignment.hpp"
#include "fast_discrete_assignment.hpp"
#include "table_base.hpp"


// Include the macro for the for each operation
#include <graphlab/macros_def.hpp>
namespace graphlab {

  /**
   * An n-D table up to max_dim dimensions. 
   * NOTE this table stores the data in log-space, although this 
   * implementation detail is abstracted away. E.g., operator* actually
   * adds values in log-space.
   * NOTE you can use begin()/end() to iterate over all assignments
   * in the domain. for example, a domain {var1[0,4), var2[0,5)} has two 
   * dimensions, var1 and var2, with four and five labels respectively.
   * logP()/set_logP() provide access to the underlying data via an 
   * assignment, e.g., [0,3]. the data in the table is serialized 
   * according to the linear indexing of the domain, which is ordered 
   * such that the variable with the lowest id iterates fastest.
   */
  template<size_t MAX_DIM>
  class dense_table_impl : public table_base<MAX_DIM> {
  public:
    typedef discrete_variable            variable_type;
    typedef discrete_domain<MAX_DIM>     domain_type;
    typedef discrete_assignment<MAX_DIM> assignment_type;
    typedef table_base<MAX_DIM>          table_base_t;


    /** Construct an empty table */
    dense_table_impl() { }
    
    /** Construct a table over the given domain */
    // dom : the domain over which the table is defined 
    dense_table_impl(const domain_type& dom) :
      _args(dom), _data(dom.size()) {  }

    /** Construct a dense table over the given domain */
    dense_table_impl(const std::vector<variable_type> &args) { 
      // Construct the arguments (which will remap the domain)
      set_domain(domain_type(args));
    }

    /** Construct a dense table over the given domain 
     * dom  : the domain over which the table is defined 
     * data : a vector of values serialized according to dom; that is, such
     *        that the variable with the smallest id iterates fastest 
     */
    dense_table_impl(const domain_type& dom, const std::vector<double> &data) :
      _args(dom), _data(dom.size()) 
    {
      set_data(data);
    }

    /** Construct a dense table over the given domain 
     * vars : a vector of variables that compose the domain
     * data : a vector of values serialized such that the first 
     *        variable in vars iterates the fastest
     * NOTE this is a convenience constructor. the entries in the 
     * vector are re-sorted such that the variable with the smallest
     * id iterates fastest
     * REVIEW make these static factory methods:
     * e.g., static dense_table<MAX_DIM>& table_from_serialized_data(...)
     */
    dense_table_impl(const std::vector<variable_type>& vars, 
        const std::vector<double>& data) 
    { 
      // Construct the arguments (which will remap the domain)
      set_domain(domain_type(vars));

      // create a faux domain with the size of the dimensions ordered correctly. this
      // is essentially a permute operation.
      domain_type dom;
      for(size_t i=0; i<vars.size(); ++i) {
        domain_type d1(variable_type(i, vars[i].size()));
        dom += d1;
      }

      for(size_t i=0; i < data.size(); ++i) { 
        assignment_type asg(dom, i);

        // permute the assignment
        std::vector<size_t> asgs(asg.begin(), asg.end());
        assignment_type fast_asg(vars, asgs);
        set_logP(fast_asg, data.at(i));
      }
    }

    /** Construct a unary table factor over the given var */
    dense_table_impl(const variable_type &var) { 
      // Construct the arguments (which will remap the domain)
      set_domain(domain_type(var));
    }

    /** Construct a unary dense table over the given var */
    dense_table_impl(const variable_type& var, std::vector<double>& logd) { 
      // Construct the arguments (which will remap the domain)
      set_domain(domain_type(var));

      set_data(logd);
    }

    /** Construct a unary dense table over the given var */
    dense_table_impl(const variable_type& var, double const* const begin, 
        double const* const end) 
    { 
      // Construct the arguments (which will remap the domain)
      set_domain(domain_type(var));

      set_data(begin, end - begin);
    }

  // NOTE currently, implementing the (big) three isnt strictly necessary
    /** Construct a copy */
    dense_table_impl(const dense_table_impl& other) :
      _args(other._args), _data(other._data) { }

    /** Destructor */
    virtual ~dense_table_impl() { }

    /** Standard assignment operator */
    dense_table_impl& operator=(const dense_table_impl& other) {
      if(this == &other) 
        return *this;

      _args = other._args;
      _data = other._data;
      return *this;
    }

  private:
    void set_data(const std::vector<double> &data) {
      DCHECK_EQ(_data.size(), data.size());
      DCHECK_EQ(_data.size(), _args.size());
      // i need this for copy_onto()
      //ASSERT_EQ(_args.num_vars(), 1);

      std::replace_copy_if(data.begin(), data.end(), _data.begin(), 
          isless(APPROX_LOG_ZERO()), APPROX_LOG_ZERO());
      //ASSERT_TRUE(is_finite());
    }
    void set_data(const double data[], const size_t n) {
      DCHECK_EQ(_data.size(), n);
      DCHECK_EQ(_data.size(), _args.size());
      // i need this for copy_onto()
      //ASSERT_EQ(_args.num_vars(), 1);

      std::replace_copy_if(data, data+n, _data.begin(), 
          isless(APPROX_LOG_ZERO()), APPROX_LOG_ZERO());
      //ASSERT_TRUE(is_finite());
    }

    struct isless {
      double _val;
      isless(const double& val) : _val(val) { }
      bool operator() (double number) { return number < _val; }
    };

  public:
    using table_base_t::APPROX_LOG_ZERO;

    // TODO implement operator== and operator!=

    using table_base_t::copy_onto;

    dense_table_impl& copy_onto(const dense_table_impl& other) {
      if(this == &other)
        return *this;

      DCHECK_EQ(args(), other.args());

      set_data(other._data);
      return *this;
    }

    void set_args(const domain_type& args) {
      _args = args;
      _data.resize(args.size());
    }
    void set_domain(const domain_type& args) {
      set_args(args);
    }

    inline const domain_type& args() const {
      return _args;
    }
    inline const domain_type& domain() const {
      return args();
    } 
    
    // NOTE index is serialized according to the linear indexing of the domain
    // TODO can i make this private
    inline const double& logP(const size_t index) const {
      DCHECK_LT(index, size());
      return _data[index];
    }
    const double& logP(const assignment_type& asg) const {
      if(asg.args() == args()) {
        // if the assignment index matches
        const size_t index(asg.linear_index());
        DCHECK_LT(index, size());
        return _data[index];
      } else {
        // Restrict the assignment to this domain
        const assignment_type sub_asg = asg.restrict(_args);
        DCHECK_LT(sub_asg.linear_index(), size());
        return _data[sub_asg.linear_index()];
      }
    }

  private:
    // clip values to be greater than or equal to APPROX_LOG_ZERO
    // NOTE not as efficient as exposing the value by reference, but safer.
    inline void set_logP(const size_t index, const double& val) {
      DCHECK_LT(index, size());
      _data[index] = std::max(val, APPROX_LOG_ZERO());
    }

  public:
    // NOTE be careful not to replace instances of logP(assignment_type) with 
    // logP(assignment_type.linear_index()) as these are not the same thing 
    // when the domains are different. 
    void set_logP(const assignment_type& asg, const double& val) {
      if(asg.args() == args()) {
        // if the assignment index matches
        const size_t index(asg.linear_index());
        DCHECK_LT(index, size());
        _data[index] = std::max(val, APPROX_LOG_ZERO());
      } else {
        // Restrict the assignment to this domain
        const assignment_type sub_asg = asg.restrict(_args);
        DCHECK_LT(sub_asg.linear_index(), size());
        _data[sub_asg.linear_index()] = std::max(val, APPROX_LOG_ZERO());
      }
    } // end of logP

    size_t size() const { 
      DCHECK_EQ(_args.size(), _data.size()); 
      return _args.size(); 
    }
    virtual size_t numel() const { return size(); } 

    size_t num_vars() const { return _args.num_vars(); }
    virtual size_t ndims() const { return num_vars(); } 

    virtual void zero() { std::fill(_data.begin(), _data.end(), 0); }

    void uniform() {
      std::fill(_data.begin(), _data.end(), log(1.0/size()));
    }
   
    void uniform(double value) {
      std::fill(_data.begin(), _data.end(), value);
    } 
  
    //! ensure that sum_x this(x) = 1 
    void normalize() {
      //ASSERT_TRUE(is_finite());
      // Compute the max value
      double max_value = logP(0);
      for(size_t i = 0; i < size(); ++i) {
        max_value = std::max(max_value, logP(i) );
      }
      // scale and compute normalizing constant
      double Z = 0.0;
      for(size_t i = 0; i < size(); ++i) {
        double val = logP(i) - max_value;
        set_logP(i, val);
        Z += exp(val);
      }
      // assert( !std::isinf(Z) );
      // assert( !std::isnan(Z) );
      // assert( Z > 0.0);
      const double logZ(log(Z));
      DASSERT_FALSE( std::isinf(logZ) );
      DASSERT_FALSE( std::isnan(logZ) );
      // Normalize
      for(size_t i = 0; i < size(); ++i) { set_logP( i, logP(i) - logZ ); }
      //ASSERT_TRUE(is_finite());
    } // End of normalize
    
    /** 
     * Ensure that the largest value in log form is zero.  This
     * prevents overflows on normalization. 
     */
    void shift_normalize() {
      //ASSERT_TRUE(is_finite());
      // Compute the max value
      double max_value = logP(0);
      for(size_t i = 0; i < size(); ++i) {
        max_value = std::max(max_value, logP(i));
      }
      for(size_t i = 0; i < size(); ++i) { set_logP( i, logP(i) - max_value ); }
      //ASSERT_TRUE(is_finite());
    }

    /**
     * Return false if any of the entries are not finite 
     */
    bool is_finite() const { 
      for(size_t i = 0; i < size(); ++i) {
        const bool is_inf( std::isinf( logP(i) ) );
        const bool is_nan( std::isnan( logP(i) ) );
        if( __builtin_expect( is_inf || is_nan, 0) ) return false;
      }
      return true;
    }


  public: 
    //! this(x) *= other(x);
    dense_table_impl& operator*=(const dense_table_impl& other) {
      return for_each_assignment(other, multiplies());
    }

//  //! Create a dense table on the fly
//  dense_table_impl operator*(const dense_table_impl& other) const {
//    dense_table_impl tbl = *this;
//    return tbl *= other;
//  }

    //! this(x) += other(x);
    // supports broadcasting of a sub-domain across the full domain 
    dense_table_impl& operator+=(const dense_table_impl& other) {
      return for_each_assignment(other, plus());
    }

    //! this(x) /= other(x);
    // supports broadcasting of a sub-domain across the full domain 
    dense_table_impl& operator/=(const dense_table_impl& other) {
      return for_each_assignment(other, divides());
    }

//  //! Create a dense table on the fly
//  dense_table_impl operator/(const dense_table_impl& other) const {
//    dense_table_impl tbl = *this;
//    return tbl /= other;
//  }

  private:
    // NOTE we assume we are in log space
    struct divides {
      double operator()(const double& a, const double& b) const {
        return a - b; 
      }
    };
    struct multiplies {
      double operator()(const double& a, const double& b) const {
        return a + b; 
      }
    };
    struct plus {
      double operator()(const double& a, const double& b) const {
        double out = log( exp(a) + exp(b) );
        DASSERT_FALSE(std::isinf( out ));
        DASSERT_FALSE(std::isnan( out ));
        return out;
      }
    };

    template<class Func>
    inline dense_table_impl& for_each_assignment(const dense_table_impl& other, 
        const Func& f) {
      //ASSERT_TRUE(is_finite());
      if(args() == other.args()) {
        DCHECK_EQ(size(), other.size());
        // More verctorizable version
        for(size_t i = 0; i < size(); ++i) {
          double val = f(logP(i), other.logP(i));
          set_logP( i, val );
          //logP(i) -= other.logP(i);
        }
      } else { 
        // other domain must be a subset of this domain
        DCHECK_EQ((args() + other.args()).num_vars(), num_vars());

        typename domain_type::const_iterator asg = args().begin();
        typename domain_type::const_iterator end = args().end();
        for( ; asg != end; ++asg) {
          double val = f(logP(asg->linear_index()), other.logP(*asg));
          set_logP( asg->linear_index(), val );
        }
      }
      //ASSERT_TRUE(is_finite());
      return *this;
    }

  public:
    // Currently unused
    //! this(x) = sum_y joint(x,y) * other(y) 
    void convolve(const dense_table_impl& joint,
                         const dense_table_impl& other) {
      // ensure that both tables have the same domain
      DCHECK_EQ(args() + other.args(), joint.args());
      // Initialize the table to zero so we can use it as an accumulator
      uniform(0);
      typename domain_type::const_iterator asg = joint.args().begin();
      typename domain_type::const_iterator end = joint.args().end();
      for( ; asg != end; ++asg) {
        const double value =
            exp(joint.logP(asg->linear_index()) + other.logP(*asg));
        DASSERT_FALSE(std::isinf( value ));
        DASSERT_FALSE(std::isnan( value ));
        // NOTE durring this accumulation, the table is not in log space
        //logP(*asg) += value;
        set_logP(*asg, logP(*asg) + value);
      }

      for(size_t i = 0; i < size(); ++i) {
        double sum = logP(i);
        DCHECK_GE(sum, 0.0);
        if(sum == 0) { set_logP( i, APPROX_LOG_ZERO() ); }
        else { set_logP( i, log(sum) ); }
      }
    }
    

    //! this(x) = other(x, y = asg) 
    void condition(const dense_table_impl& other,
                   const assignment_type& asg) {
      DCHECK_EQ(args(), other.args() - asg.args());
        
      // create a fast assignment starting from the '0' assignment
      // of args() and the conditioning assignment of asg
      fast_discrete_assignment<MAX_DIM> fastyasg(assignment_type(args()) & asg);
      // transpose the remaining assignments to the start
      fastyasg.transpose_to_start(args());
      
      typename domain_type::const_iterator xasg = args().begin(); 
      typename domain_type::const_iterator end = args().end();
      for( ; xasg != end; ++xasg) {
        // REVIEW should this be other.logP(fastasg)? since other and fastyasg 
        // dont have the same domain? (would still need to convert fastasg to 
        // a discrete_assignment)
        set_logP( xasg->linear_index(), other.logP(fastyasg.linear_index()) );
        ++fastyasg;
      }
    }


    //! this(x) = this(x) other(x, y = asg) 
    void times_condition(const dense_table_impl& other,
                         const assignment_type& asg) {
      //assert(args() == other.args() - asg.args());
        
      // create a fast assignment starting from the '0' assignment
      // of args() and the conditioning assignment of asg
      fast_discrete_assignment<MAX_DIM> fastyasg(assignment_type(args()) & asg);
      // transpose the remaining assignments to the start
      fastyasg.transpose_to_start(args());
      if(asg.num_vars() == 0) {
        *this *= other;
      } else {
        typename domain_type::const_iterator xasg = args().begin(); 
        typename domain_type::const_iterator end = args().end();
        for( ; xasg != end; ++xasg) {
          // REVIEW should this be other.logP(fastasg)? since other and fastyasg 
          // dont have the same domain? (would still need to convert fastasg to 
          // a discrete_assignment)
          double val = logP(xasg->linear_index()) + other.logP(fastyasg.linear_index());
          set_logP( xasg->linear_index(), val );
          ++fastyasg;
        }
      }
    }
    
    using table_base_t::marginalize;
    
    //! msg(x) = sum_y this(x,y) 
    void marginalize(dense_table_impl& msg) const {
      // No need to marginalize
      if(args() == msg.args()) {
        // Just copy and return
        msg = *this;
        return;
      }
      // Compute the domain to remove
      domain_type ydom = args() - msg.args();
      DCHECK_GT(ydom.num_vars(), 0);
          
      fast_discrete_assignment<MAX_DIM> fastyasg(args());
      fastyasg.transpose_to_start(ydom);
      // count the number of elements in ydom
      size_t numel = ydom.size();
      // Loop over x
      typename domain_type::const_iterator xasg = msg.args().begin(); 
      typename domain_type::const_iterator end = msg.args().end();
      for( ; xasg != end; ++xasg) {
        double sum = 0;
        for(size_t i = 0;i < numel; ++i) {
          sum += exp(logP(fastyasg.linear_index()));
          ++fastyasg;
        }
        DASSERT_FALSE( std::isinf(sum) );
        DASSERT_FALSE( std::isnan(sum) );
        DCHECK_GE(sum, 0.0);
        if(sum == 0) 
          msg.set_logP( xasg->linear_index(), APPROX_LOG_ZERO() );
        else 
          msg.set_logP( xasg->linear_index(), log(sum) );
      }
    }
      
    using table_base_t::MAP;

    //! msg(x) = max_y this(x,y)
    void MAP(dense_table_impl& msg) const {
      //ASSERT_TRUE(is_finite());
      // No need to marginalize
      if(args() == msg.args()) {
        // Just copy and return
        msg = *this;
        return;
      }
      // Compute the domain to remove
      domain_type ydom = args() - msg.args();
      DCHECK_GT(ydom.num_vars(), 0);
        
      fast_discrete_assignment<MAX_DIM> fastyasg(args());
      fastyasg.transpose_to_start(ydom);
      // count the number of elements in ydom
      size_t numel = ydom.size();
      // Loop over x
      typename domain_type::const_iterator xasg = msg.args().begin();
      typename domain_type::const_iterator end = msg.args().end();
      for( ; xasg != end; ++xasg) {
        double maxval = APPROX_LOG_ZERO();
        for(size_t i = 0;i < numel; ++i) {
          maxval = std::max(maxval, logP(fastyasg.linear_index()));
          ++fastyasg;
        }
        msg.set_logP( xasg->linear_index(), maxval );
      }
      //ASSERT_TRUE(is_finite());
    }

    //! This = other * damping + this * (1-damping) 
    void damp(const dense_table_impl& other, const double& damping) {
      //ASSERT_TRUE(is_finite());
      // This table must be over the same domain as the other
      if(damping == 0) return;
      DCHECK_EQ(args(), other.args());
      DCHECK_GT(damping, 0.0);
      DCHECK_LT(damping, 1.0);
      for(size_t i = 0; i < size(); ++i) {
        double val = damping * exp(other.logP(i)) + (1-damping) * exp(logP(i));
        DCHECK_GE(val, 0);
        if(val == 0) { set_logP( i, APPROX_LOG_ZERO() ); }
        else { set_logP( i, log(val) ); }
        DASSERT_FALSE( std::isinf(logP(i)) );
        DASSERT_FALSE( std::isnan(logP(i)) );
      }
      //ASSERT_TRUE(is_finite());
    }


    //! compute the l_inf norm (i.e. the max diff) between two tables
    double linf_diff(const dense_table_impl& other) const {
      //ASSERT_TRUE(is_finite());
      // This table must be over the same domain as the other
      DCHECK_EQ(args(), other.args());  
      double max_diff = 0;
      for(size_t i = 0; i < size(); ++i) {
        double diff = fabs(exp(other.logP(i)) - exp(logP(i)));
        if (diff > max_diff) {
          max_diff = diff;
        }
      }
      //ASSERT_TRUE(is_finite());
      return max_diff;
    }

    //! compute the average l1 norm between two tables
    double l1_diff(const dense_table_impl& other) const {
      //ASSERT_TRUE(is_finite());
      // This table must be over the same domain as the other
      DCHECK_EQ(args(), other.args());  
      double sum = 0;
      for(size_t i = 0; i < size(); ++i) {
        sum += fabs(exp(other.logP(i)) - exp(logP(i)));
      }
      //ASSERT_TRUE(is_finite());
      return sum / size(); // TODO the l1 norm is not normalized
    }

    //! compute the l1 norm in log space
    double l1_logdiff(const dense_table_impl& other) const {
      DCHECK_EQ(args(), other.args());
      double sum = 0; 
      for(size_t i = 0; i < size(); ++i) {
        sum += fabs(other.logP(i) - logP(i));
      }
      return sum / size(); // TODO the l1 norm is not normalized
    }

    //! argmax(): return the assignment of the largest value
    assignment_type max_asg() const {
      typename domain_type::iterator max_asg = args().begin();
      double max_value = logP(max_asg->linear_index());

      typename domain_type::const_iterator asg = args().begin(); 
      typename domain_type::const_iterator end = args().end();
      for( ; asg != end; ++asg) {
        if(logP(asg->linear_index()) > max_value) {
          max_value = logP(asg->linear_index());
          *max_asg = *asg;
        }
      }
      return *max_asg;
    }

    //! return the linear index of the largest value
    size_t max_index() const {
      return max_asg().linear_index();
    }

    /**
     * Compute the expectation of the dense table
     */
    inline void expectation(std::vector<double>& values) const {
      values.clear();
      values.resize(num_vars(), 0);
      double sum = 0;
      typename domain_type::const_iterator asg = args().begin(); 
      typename domain_type::const_iterator end = args().end();
      for( ; asg != end; ++asg) {
      const double scale = exp(logP(asg->linear_index()));
        sum += scale;
        typename assignment_type::const_iterator asg_it = asg.begin();
        for(size_t i = 0; i < num_vars(); ++i) {
          values[i] += asg_it[i] * scale;
        }
      }
      // Rescale for normalization
      for(size_t i = 0; i < num_vars(); ++i)  values[i] /= sum;
    } // end of expectation

    /**
     * Draw a sample from the dense table
     */
    inline assignment_type sample() const {
      DCHECK_GT(size(), 0);
      // This table must be normalized
      const double t = graphlab::random::rand01();
      DCHECK_GE( t, 0 );
      DCHECK_LT( t, 1 );
      double sum = 0;
      for(size_t i = 0; i < size(); ++i) {
        sum += exp( logP(i) );
        if(t <=sum) { return assignment_type(args(), i); }
        DCHECK_LT(sum, 1);
      }
      // Unreachable
      throw("Invalid state reached in sample()");
      return assignment_type();
    } // end of sample
    

    /**
     * Construct a binary agreement factor
     */
    void set_as_agreement(const double& lambda) {
      DCHECK_EQ(num_vars(), 2);

      typename domain_type::const_iterator asg = args().begin();
      typename domain_type::const_iterator end = args().end();
      for( ; asg != end; ++asg) {
        typename assignment_type::const_iterator asg_it = asg->begin();
        const int diff = abs( int(asg_it[0]) - int(asg_it[1]) );
        if( diff > 0) { set_logP( asg->linear_index(), -lambda ); }
        else { set_logP( asg->linear_index(), 0 ); }
      }
    } // end of set_as_agreement
    
    void set_as_laplace(const double& lambda) {
      DCHECK_EQ(num_vars(), 2);
      typename domain_type::const_iterator asg = args().begin();
      typename domain_type::const_iterator end = args().end();
      for( ; asg != end; ++asg) {
        typename assignment_type::const_iterator asg_it = asg->begin();
        const int diff = abs( int(asg_it[0]) - int(asg_it[1]) );
        set_logP( asg->linear_index(), -diff * lambda );
      }
    } // end of set_as_laplace

  public:
    void load(graphlab::iarchive& arc) {
      arc >> _args;
      arc >> _data;
    }

    void save(graphlab::oarchive& arc) const {
      arc << _args;
      arc << _data;
    }
    
  private:
  //! The domain of the table (the arity of the table, along with its cardinality) 
    domain_type _args;
    // NOTE _data is ordered according to the linear indexing of our domain, 
    // which is ordered such that the variable with the lowest id iterates 
    // fastest. weird! i know!
    std::vector<double> _data;
  }; // End of dense table


// REVIEW could move the these methods into dense_table_impl (which 
// would become dense_table again) like in sparse_table 
  template<size_t MAX_DIM> 
  class dense_table : public dense_table_impl<MAX_DIM> {
    typedef dense_table_impl<MAX_DIM>  dense_table_impl_t;

    // "using typename" parses but doesn't work in GCC < 4.7. Use "typedef typename" instead.
    //using typename dense_table_impl_t::table_base_t;
    //using typename dense_table_impl_t::variable_type;
    //using typename dense_table_impl_t::domain_type;
    //using typename dense_table_impl_t::assignment_type;
    typedef typename dense_table_impl_t::table_base_t     table_base_t;
    typedef typename dense_table_impl_t::variable_type    variable_type;
    typedef typename dense_table_impl_t::domain_type      domain_type;
    typedef typename dense_table_impl_t::assignment_type  assignment_type;

  public:
    /** Construct an empty dense table */
    dense_table() : dense_table_impl_t() { }
    
    /** Construct a dense table over the given domain */
    dense_table(const domain_type& dom) :
        dense_table_impl_t(dom) { }

    /** Construct a dense table over the given domain and distribution */
    dense_table(const domain_type& dom, const std::vector<double> &data) :
        dense_table_impl_t(dom, data) { }

    /** Construct a dense table over the given domain */
    dense_table(const std::vector<variable_type> &args) : 
        dense_table_impl_t(args) { }

    /** Construct a dense table over the given domain */
    dense_table(const std::vector<variable_type> &args, 
        const std::vector<double> &data) : dense_table_impl_t(args, data) { }

    // REVIEW make these static factory methods: 
    // static dense_table<MAX_DIM>& unary_table_node(...) or use the virtual
    // constructor idiom or something
    /** Construct a unary dense table over the given var */
    dense_table(const variable_type &var) : dense_table_impl_t(var) { }

    /** Construct a unary dense table over the given var */
    dense_table(const variable_type& var, std::vector<double>& logd) : 
        dense_table_impl_t(var, logd) { }

    /** Construct a unary dense table over the given var */
    dense_table(const variable_type& var, 
        double const* const begin, double const* const end) :
        dense_table_impl_t(var, begin, end) { }

    /** Construct a copy */
    dense_table(const dense_table& other) :
        dense_table_impl_t(other) { }

    virtual ~dense_table() { }

    // REVIEW currently, this isnt necessary
    /** Standard assignment operator */
    dense_table& operator=(const dense_table& other) {
      if(this == &other) 
        return *this;

      dense_table_impl_t::operator=(other);

      return *this;
    }

    friend std::ostream& operator<<(std::ostream& out,
                              const dense_table<MAX_DIM>& tbl) {
      out << "Dense Table: " << tbl.args() << "{" << std::endl;
      typename domain_type::const_iterator asg = tbl.args().begin();
      typename domain_type::const_iterator end = tbl.args().end();
      for( ; asg != end; ++asg) {
        out << "\tLogP(" << *asg << ")=" << tbl.logP(*asg) << std::endl;
      }
      out << "}";

//      dense_table<MAX_DIM>::const_iterator asg = tbl.args().begin();
//      dense_table<MAX_DIM>::const_iterator end = tbl.args().end();
//      for( ; asg != end; ++asg) {
//        out << tbl.logP(*asg) << " ";
//      }

      return out;
    }

  // virtual methods
  public:
    typedef dense_table const *const const_ptr;

    virtual dense_table& deep_copy(const table_base_t& base) {
      if(this == &base) return *this;

      // ensure we are dealing with a dense_table
      const_ptr other = dynamic_cast<const_ptr>(&base);
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *this = *other;
      return *this;
    }

    using dense_table_impl_t::copy_onto;

    virtual dense_table& copy_onto(const table_base_t& base) {
      if(this == &base) return *this;

      // ensure we are dealing with a dense_table
      const_ptr other = dynamic_cast<const_ptr>(&base);
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      dense_table_impl_t::copy_onto(*other);
      return *this;
    }

    virtual const variable_type& var(const size_t index) const {
      return dense_table_impl_t::domain().var(index);
    }
/*
    //! this(x) += other(x);
    virtual dense_table& plus_equals(const table_base_t& base) {
      // ensure we are dealing with a dense_table
      const_ptr other = dynamic_cast<const_ptr>(&base);
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *this += *other;
      return *this;
    }
*/
    //! this(x) *= other(x);
    virtual dense_table& times_equals(const table_base_t& base) {
      // ensure we are dealing with a dense_table
      const_ptr other = dynamic_cast<const_ptr>(&base);
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *this *= *other;

      return *this;
    }

    //! this(x) /= other(x);
    virtual dense_table& divide_equals(const table_base_t& base) {
      // ensure we are dealing with a dense_table
      const_ptr other = dynamic_cast<const_ptr>(&base);
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *this /= *other;

      return *this;
    }

    //! (out(x) = this(x)) * other(x);
    virtual void times(const table_base_t& base, 
        table_base_t& out_base) const {

      // ensure we are dealing with a dense_table
      dense_table *const out = 
          dynamic_cast<dense_table *const>(&out_base);
      if(out == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *out = *this; // deep copy
      out->times_equals(base);
    }

    //! (out(x) = this(x)) / other(x);
    virtual void divide(const table_base_t& base, 
        table_base_t& out_base) const {

      // ensure we are dealing with a dense_table
      dense_table *const out = 
          dynamic_cast<dense_table *const>(&out_base);
      if(out == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      *out = *this; // deep copy
      out->divide_equals(base);
    }

    using dense_table_impl_t::marginalize;

    virtual void marginalize(table_base_t& base) const {
      // ensure we are dealing with a dense_table
      dense_table* msg = dynamic_cast<dense_table*>(&base);
      if(msg == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      dense_table_impl_t::marginalize(*msg);
    }

    using dense_table_impl_t::MAP;

    virtual void MAP(table_base_t& base) const {
      // ensure we are dealing with a dense_table
      dense_table* msg = dynamic_cast<dense_table*>(&base);
      if(msg == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      dense_table_impl_t::MAP(*msg);
    }
    
  public:
    virtual void load(graphlab::iarchive& arc) { 
      dense_table_impl_t::load(arc); 
    }

    virtual void save(graphlab::oarchive& arc) const {
      dense_table_impl_t::save(arc); 
    }

    virtual std::ostream& print(std::ostream& out = std::cout) const {
      // ensure we are dealing with a dense_table
      const_ptr tbl = dynamic_cast<const_ptr>(this);
      if(tbl == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      out << *tbl;
      return out;
    }
  };


}; // end of namespace graphlab


#include <graphlab/macros_undef.hpp>
#endif // DENSE_TABLE_HPP


================================================
FILE: toolkits/graphical_models/factors/discrete_assignment.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 */


#ifndef DISCRETE_ASSIGNMENT_HPP
#define DISCRETE_ASSIGNMENT_HPP

#include <assert.h>

// Random number generation
#include <graphlab/util/random.hpp>

#include "discrete_variable.hpp"
#include "discrete_bounds.hpp"


#include <graphlab/macros_def.hpp>
namespace graphlab {

/**
 * This class respresents a discrete assignment on a domain, i.e, an
 * index into a domain. E.g., element [3,1] in the domain [0,5) x [0x5).
 *
 *  \author Joseph Gonzalez
 *  \author Scott Richardson     09/2012
 *
 */
template<size_t MAX_DIM>
class discrete_assignment {
  typedef uint16_t  subasg_type;

public:
  typedef subasg_type*         iterator;
  typedef const subasg_type*   const_iterator;

public:
  typedef discrete_bounds<MAX_DIM> domain_type;
  typedef discrete_variable        variable_type;

  //! Construct an empty discrete_assignment
  discrete_assignment() : _index(0) { }  

  //! Construct a zero discrete_assignment over the domain
  explicit discrete_assignment(const domain_type& args) :
      _args(args), _index(0) {
    for(size_t i = 0; i < args.num_vars(); ++i) 
      _asgs[i] = 0;
  }
  
  //! construct an discrete_assignment from one variable
  discrete_assignment(const variable_type& v1, size_t asg1) :
      _args(v1), _index(asg1) {
    assert(asg1 < v1.size());
    _asgs[0] = asg1;
  }
  
  //! construct an discrete_assignment from two variables
  discrete_assignment(const variable_type& v1, size_t asg1, 
                      const variable_type& v2, size_t asg2) :  
      _args(v1, v2), _index(0) {
    set_asg(v1.id(), asg1);
    set_asg(v2.id(), asg2);
  }

  //! construct an discrete_assignment from multiple variables
  // NOTE each element of asg indexes into the dimension defined by the 
  // corresponding element (variable) of vars
  discrete_assignment(const std::vector<variable_type>& vars, 
                      const std::vector<size_t>& asg) :
      _args(domain_type(vars)), _index(0) {
    assert(vars.size() == asg.size());
    // map variables to their assignment in the domain
    for(size_t j=0; j<asg.size(); ++j) {
      set_asg(vars[j].id(), asg[j], false);
    }
    recompute_linear_index(); 
  }
  
  //! Construct a zero discrete_assignment over the domain
  // NOTE the domain's dimensions are sorted (based on id) at construction;
  // the index must be computed accordingly. 
  discrete_assignment(const domain_type& args, size_t index) :
      _args(args), _index(index) {
    assert(index < _args.size());
    recompute_asgs();
  }

  //! Construct an discrete_assignment from a vector of variables and an
  //! assignment vector
  // NOTE the domain's dimensions are sorted (based on id) at construction;
  // the asg must be sorted similarly. update() can be used to update an  
  // existing assignment from another
  discrete_assignment(const domain_type& args,
                      const std::vector<size_t>& asg) :
      _args(args), _index(0) {
    for(size_t i = 0; i < _args.num_vars(); ++i) {
      assert(asg[i] < args.var(i).size());
      _asgs[i] = asg[i];
    }
    recompute_linear_index();
  }

  iterator begin() { return &(_asgs[0]); }
  iterator end() { return &(_asgs[_args.num_vars()]); }
  const_iterator begin() const { return &(_asgs[0]); }
  const_iterator end() const { return &(_asgs[_args.num_vars()]); }

  // //! Construct the union of two discrete_assignments
  // inline discrete_assignment& operator&=(const discrete_assignment& asg2) {
  //   discrete_assignment asg1 = *this;
  //   const domain_type& dom1 = asg1.args();
  //   const domain_type& dom2 = asg2.args();
  //   _args = dom1 + dom2;
  //   _index = 0;
  //   size_t i = 0, j1 = 0, j2 = 0;
  //   for( ; i < _args.num_vars() && 
  //          (j1 < dom1.num_vars() || j2 < dom2.num_vars()); 
  //        ++i) {
  //     // If the the two discrete_assignments share a same variable
  //     if(j1 < dom1.num_vars() && 
  //        _args.var(i) == dom1.var(j1) && 
  //        j2 < dom2.num_vars() &&
  //        _args.var(i) == dom2.var(j2)) {
  //       // Then they must have the same discrete_assignment
  //       //          assert(asg1._asgs[j1] == asg2._asgs[j2]);
  //       _asgs[i] = asg1._asgs[j1];
  //       ++j1; ++j2;
  //     } else if(j1 < dom1.num_vars() &&
  //               _args.var(i) == dom1.var(j1) ) {
  //       _asgs[i] = asg1._asgs[j1];
  //       ++j1;
  //     } else if(j2 < dom2.num_vars() &&
  //               _args.var(i) == dom2.var(j2) ) {
  //       _asgs[i] = asg2._asgs[j2];
  //       ++j2;
  //     } else {
  //       // Unreachable state
  //       assert(false);
  //     }
  //   }
  //   assert(i == _args.num_vars());
  //   assert(j1 == dom1.num_vars());
  //   assert(j2 == dom2.num_vars());
  //   recompute_linear_index();
  //   return *this;
  // }
  // // Construct the union of two discrete_assignments
  // discrete_assignment operator&(const discrete_assignment& other) const {
  //   discrete_assignment new_asg = *this;
  //   return new_asg &= other;
  // }  

  //! Construct the union of two discrete_assignments
  inline discrete_assignment operator&(const discrete_assignment& other) const {
    discrete_assignment result(args() + other.args());
    // Require disjoint discrete_assignments
    //      assert(args().size() + other.args().size() == result.size());
    size_t i = 0, j = 0, k = 0;
    while(i < num_vars() && j < other.num_vars()) {        
      // extra increment if necessary
      assert(k < result.num_vars());
      result._asgs[k] = 
        (result.args().var(k) == args().var(i))?
        asg_at(i) : other.asg_at(j);
      // if the variables are the same then the discrete_assignments must
      // also be the same
      assert(!(args().var(i) == other.args().var(j)) ||
             (asg_at(i) == other.asg_at(j)));               
      // move indexs
      i += (args().var(i) == result.args().var(k));
      j += (other.args().var(j) == result.args().var(k));
      k++;
    }
    while(i < num_vars()) 
      result._asgs[k++] = asg_at(i++);
    while(j < other.num_vars()) 
      result._asgs[k++] = other.asg_at(j++); 
    // recompute the linear index of the result
    result.recompute_linear_index();
    return result;
  }
    
  // Construct the union of two discrete_assignments
  inline discrete_assignment& operator&=(const discrete_assignment& other) {
    discrete_assignment tmp = *this & other;
    *this = tmp;
    return *this;
  }
  
  //! Get the next discrete_assignment
  discrete_assignment& operator++() {
    assert(_index < _args.size());
    // Increment the index
    ++_index;
    // Update the discrete_assignments
    for(size_t i = 0; i < _args.num_vars(); ++i) {
      _asgs[i]= ((_asgs[i] + 1) % _args.var(i).size());
      if(_asgs[i] > 0) { return *this; }
    }
    // Reached end
    make_end();
    return *this;
  }

  //! Make this an ending discrete_assignment
  const discrete_assignment& make_end() {
    _index = -1;
    return *this;
    // for(size_t i = 0; i < _args.num_vars(); ++i)
    //   _asgs[i] = _args.var(i).size();
  }

  //! Uniformly sample a new index value
  void uniform_sample() {
    set_index( graphlab::random::fast_uniform(size_t(0), size() - 1)  );
  }
  

  //! get the domain
  inline const domain_type& args() const { return _args; }

  //! get the number of variables
  inline size_t num_vars() const { return _args.num_vars(); }

  //! get the size of the discrete_assignment
  inline size_t size() const { return _args.size(); }


  size_t asg(const variable_type& var) const {
    return asg(var.id());
  }
  void set_asg(const variable_type& var, size_t value) {
    set_asg(var.id(), value);
  }

 private:
  size_t asg(size_t var_id) const {
    size_t index = _args.var_location(var_id);
    assert(index < _args.num_vars());
    return _asgs[index];
  }

  size_t asg_at(size_t index) const {
    assert(index < _args.num_vars());
    return _asgs[index];
  }

  void set_asg(size_t var_id, size_t value, bool recompute=true) {
    size_t index = _args.var_location(var_id);
    assert(index < _args.num_vars());
    assert(value < _args.var(index).size());
    _asgs[index] = value;
    if(recompute) recompute_linear_index();
  }

  void set_asg_at(size_t index, size_t value, bool recompute=true) {
    assert(index < _args.num_vars());
    assert(value < _args.var(index).size());
    _asgs[index] = value;
    if(recompute) recompute_linear_index();
  }

 public:
  //! Get the index of this discrete_assignment
  // NOTE index is serialized according to the linear indexing of the domain
  inline size_t linear_index() const { return _index; }

  //! Set the index of this discrete_assignment
  // NOTE index is serialized according to the linear indexing of the domain
  void set_index(size_t index) {
    assert(index < _args.size());
    _index = index;
    recompute_asgs();
  }

  //! Tests whether two discrete_assignments are equal
  bool operator==(const discrete_assignment& other) const {
    return _index == other._index;
  }
  //! Tests whether two discrete_assignments are not equal
  bool operator!=(const discrete_assignment& other) const {
    return !this->operator==(other);
  }
  //! Tests whether this discrete_assignment is < other
  bool operator<(const discrete_assignment& other) const {
    return _index < other._index;
  }
  //! Tests whether this discrete_assignment is > other
  bool operator>(const discrete_assignment& other) const {
    return other.operator<(*this);
  }
  //! Tests whether this discrete_assignment is <= other
  bool operator<=(const discrete_assignment& other) const {
    return !this->operator>(other);
  }
  //! Tests whether this discrete_assignment is >= other
  bool operator>=(const discrete_assignment& other) const {
    return !this->operator<(other);
  }
  
  
  //! Restrict the discrete_assignment to a discrete_assignment over the sub-domain
  discrete_assignment restrict(const domain_type& sub_domain) const {
    // sub_domain must be a subset of this domain
    DCHECK_EQ((_args + sub_domain).num_vars(), num_vars());

    discrete_assignment other_asg(sub_domain);
    size_t index = 0;
    // Map the variables 
    // NOTE this depends on the list of variables in both domains being sorted
    for(size_t i = 0; i < _args.num_vars() && 
        index < sub_domain.num_vars(); ++i) {
      if(sub_domain.var(index) == _args.var(i)) {
        other_asg._asgs[index] = _asgs[i];
        index++;
      }
    }
    assert(index == sub_domain.num_vars());
    // Recompute the index
    other_asg.recompute_linear_index();
    return other_asg;
  } // end of restrict

  //! Update the variables in this discrete_assignment with the values in the
  //! other discrete_assignment
  void update(const discrete_assignment& other) {
    // REVIEW should this domain be a subset of the other domain?

    // NOTE this depends on the list of variables in both domains being sorted
    for(size_t i = 0, j = 0;
        i < num_vars() && j < other.num_vars(); ) {
      if(_args.var(i) == other._args.var(j)) {
        _asgs[i] = other._asgs[j]; i++; j++;
      }
      while(i < num_vars() &&
            _args.var(i) < other.args().var(j)) i++;
      while(j < other.num_vars() && 
            other.args().var(j) < _args.var(i)) j++;
    }
    recompute_linear_index();
  }

  void load(graphlab::iarchive& arc) {
    arc >> _args;
    arc >> _index;
    recompute_asgs();
  }
  
  void save(graphlab::oarchive& arc) const {
    arc << _args;
    arc << _index;
  }

  friend std::ostream& operator<<(std::ostream& out,
                            const discrete_assignment<MAX_DIM>& asg) {
    out << "{";
    for(size_t i = 0; i < asg.args().num_vars(); ++i) {
      // TODO the v_varId is redundant. remove it
      //out << "v_" << asg.args().var(i).id() << "=";
      out << asg.asg_at(i);
      if(i < asg.args().num_vars() - 1) out << ", ";
    }
    out << "}=" << asg.linear_index();
    return out;
  }

private:
  //! Recompute the index from the discrete_assignment
  void recompute_linear_index() {
    size_t multiple = 1;
    // Clear the index
    _index = 0;
    for(size_t i = 0; i < _args.num_vars(); ++i) {
      _index += multiple * _asgs[i];
      // assert(_args.var(i).nasgs > 0);
      multiple *= _args.var(i).size();
    }
  }
    
  //! Recompute the discrete_assignments from the index
  void recompute_asgs() {
    assert(_index < _args.size());
    size_t quotient = _index;
    for(size_t i = 0; i < _args.num_vars(); ++i) {
      _asgs[i] = quotient % _args.var(i).size();
      quotient /= _args.var(i).size();
      // assert(_asgs[i] < _args.var(i).size());
    }
  }

  // a discrete domain over a set of variables, e.g., {v1, v2}
  domain_type _args;
  // an assignment on the domain, e.g., [3,1] (recomputed anytime
  // _index is updated with recompute_asgs())
  subasg_type _asgs[MAX_DIM];
  // the linear index of the assignment _asgs (recomputed anytime _asgs is 
  // updated with recompute_linear_index())
  uint32_t _index;
};


}; // end of namespace graphlab

#include <graphlab/macros_undef.hpp>
#endif // DISCRETE_ASSIGNMENT_HPP


================================================
FILE: toolkits/graphical_models/factors/discrete_bounds.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 */


#ifndef DISCRETE_BOUNDS_HPP
#define DISCRETE_BOUNDS_HPP

#include <graphlab/logger/assertions.hpp>

#include "discrete_variable.hpp"


#include <graphlab/macros_def.hpp>
namespace graphlab {


/**
 * This class respresents a discrete domain over a set of variables.
 */
template<size_t MAX_DIM>
class discrete_bounds {
public:
  //! Make an empty domain
  discrete_bounds() : _num_vars(0) { }

  //! Make a single variable discrete_bounds
  discrete_bounds(const discrete_variable& v1) :
      _num_vars(1) 
  {
    DCHECK_LE(_num_vars, MAX_DIM);
    _vars[0] = v1;
  }

  //! Make a two variable discrete_bounds
  discrete_bounds(const discrete_variable& v1, const discrete_variable& v2) :
      _num_vars(2) 
  {
    DCHECK_LE(_num_vars, MAX_DIM);
    assert(v1 != v2);
    if(v1 < v2) {
      _vars[0] = v1;
      _vars[1] = v2;
    } else {
      _vars[0] = v2;
      _vars[1] = v1;
    }
  }

  //! Make a three variable discrete_bounds
  discrete_bounds(const discrete_variable& v1,
                  const discrete_variable& v2,
                  const discrete_variable& v3) :
      _num_vars(3) 
  {
    DCHECK_LE(_num_vars, MAX_DIM);
    DCHECK_NE(v1, v2);
    DCHECK_NE(v2, v3);
    DCHECK_NE(v1, v3);
      
    if(v1 < v2 && v2 < v3) {
      _vars[0] = v1;
      _vars[1] = v2;
      _vars[2] = v3;
    } else if( v1 < v3 && v3 < v2) {
      _vars[0] = v1;
      _vars[1] = v3;
      _vars[2] = v2;
    } else if( v2 < v1 && v1 < v3) {
      _vars[0] = v2;
      _vars[1] = v1;
      _vars[2] = v3;
    } else if( v2 < v3 && v3 < v1) {
      _vars[0] = v2;
      _vars[1] = v3;
      _vars[2] = v1;
    } else if( v3 < v1 && v1 < v2) {
      _vars[0] = v3;
      _vars[1] = v1;
      _vars[2] = v2;
    } else if( v3 < v2 && v2 < v1) {
      _vars[0] = v3;
      _vars[1] = v2;
      _vars[2] = v1;
    } else { throw("Invalid Case!"); }
  }

  //! Make a discrete_bounds from a vector of variables
  explicit discrete_bounds(const std::vector<discrete_variable>& vars) :
      _num_vars(vars.size()) 
  {
    DCHECK_LE(_num_vars, MAX_DIM);
    for(size_t i = 0; i < _num_vars; ++i) {
      _vars[i] = vars[i];
    }
    std::sort(_vars, _vars + std::min(MAX_DIM, _num_vars) );
  }

  //! Make a discrete_bounds from a set of variables
  explicit discrete_bounds(const std::set<discrete_variable>& vars) :
      _num_vars(vars.size()) 
  {
    DCHECK_LE(_num_vars, MAX_DIM); 
    size_t i = 0; 
    foreach(const discrete_variable& var, vars) _vars[i++] = var;
  }
  
  discrete_bounds(const discrete_bounds& other) :
      _num_vars(other._num_vars) 
  {
    *this = other;
  }

  virtual ~discrete_bounds() { }

  discrete_bounds& operator=(const discrete_bounds& other) {
    if(this == &other) 
      return *this;
    
    _num_vars = other._num_vars;
    DCHECK_LE(_num_vars, MAX_DIM);
    for(size_t i = 0; i < _num_vars; ++i) {
      _vars[i] = other.var(i);
    }
    return *this;
  }

  //! test whether two discrete_boundss are equal
  bool operator==(const discrete_bounds& other) const {
    if( num_vars() != other.num_vars() ) return false;  
    for(size_t i = 0; i < num_vars(); ++i) {
      if(var(i) != other.var(i)) return false;
    }
    return true;
  }
    
  //! test whether two discrete_boundss are not equal
  bool operator!=(const discrete_bounds& other) const {
    return !(*this == other);
  }

  //! add the other discrete_bounds to this discrete_bounds
  discrete_bounds& operator+=(const discrete_variable& var) {
    if(_vars[_num_vars - 1] < var) {
      _vars[_num_vars] = var;
      _num_vars++;
      return *this;
    }
    return operator+=(discrete_bounds(var));
  }

  //! add the discrete_bounds to this discrete_bounds
  discrete_bounds& operator+=(const discrete_bounds& other) {
    if(other.num_vars() == 0) return *this;
    discrete_bounds backup = *this;
    _num_vars = 0;
    for(size_t i = 0, j = 0; 
        i < backup.num_vars() || j < other.num_vars(); ) {
      DCHECK_LE(_num_vars, MAX_DIM);
      // Both 
      if(i < backup.num_vars() && j < other.num_vars() 
         && _num_vars < MAX_DIM) {
        if(backup.var(i) < other.var(j))  
          _vars[_num_vars++] = backup.var(i++);
        else if(other.var(j) < backup.var(i))  
          _vars[_num_vars++] = other.var(j++);
        else { _vars[_num_vars++] = backup.var(i++); j++; }
      } else if(i < backup.num_vars() && _num_vars < MAX_DIM) {
        _vars[_num_vars++] = backup.var(i++);
      } else if(j < other.num_vars() && _num_vars < MAX_DIM) {
        _vars[_num_vars++] = other.var(j++);
      } else {
        *this = backup;
        // Unreachable
        throw("Unreachable case in domain operator+=");
      }
    }
    return *this;
  }
  
  //! add two discrete_boundss together
  discrete_bounds operator+(const discrete_variable& var) const {
    discrete_bounds dom = *this;
    return dom += var;
  }

  //! add the other discrete_bounds to this discrete_bounds
  discrete_bounds operator+(const discrete_bounds& other) const {
    discrete_bounds dom = *this;
    return dom += other;
  }

  
  //! subtract the other discrete_bounds from this discrete_bounds
  discrete_bounds& operator-=(const discrete_bounds& other) {
    if(other.num_vars() == 0) return *this;
      
    size_t tmp_num_vars = 0;
    for(size_t i = 0, j = 0; i < _num_vars; ++i ) {
      // advance the other index
      for( ; j < other._num_vars && _vars[i].id() > other._vars[j].id(); ++j) { }

      if(!(j < other._num_vars && _vars[i].id() == other._vars[j].id())) {
        _vars[tmp_num_vars++] = _vars[i];
      }
    }
    _num_vars = tmp_num_vars;
    return *this;
  }

  //! subtract the other discrete_bounds from this discrete_bounds
  discrete_bounds operator-(const discrete_bounds& other) const {
    discrete_bounds dom(*this);
    return dom -= other;
  }

  discrete_bounds intersect(const discrete_bounds& other) const {
    discrete_bounds new_dom;
    new_dom._num_vars = 0;
    for(size_t i = 0, j = 0;
        i < num_vars() && j < other.num_vars(); ) {
      if(_vars[i] == other._vars[j]) {
        // new discrete_bounds gets the variable
        new_dom._vars[new_dom.num_vars()] = _vars[i];
        // Everyone advances
        new_dom._num_vars++;  i++; j++;
      } else {
        // otherwise increment one of the variables          
        if(_vars[i] < other._vars[j]) i++; else j++;
      }
    }
    return new_dom;
  }

  //! Get the number of variables
  inline size_t num_vars() const { return _num_vars; }

  //! Get the ith variable
  inline const discrete_variable& var(size_t index) const {
    DCHECK_LT(index, _num_vars);
    return _vars[index];
  }

  /** get the index of the variable or returns number of variables
      if the index is not found */
  size_t var_location(discrete_variable var) const {
    return var_location(var.id());
  }

  size_t var_location(size_t var_id) const {
    size_t location = _num_vars;
    for(size_t i = 0; i < _num_vars && !(location < _num_vars); ++i) {
      if(_vars[i].id() == var_id) location = i;
    }
    return location;
  }

  // get the index within our domain of each variable in other.
  // other must be a subset 
  std::vector<size_t> vars_location(const discrete_bounds& other) const {
    // ensure that the other domain is a subset of our domain
    DCHECK_EQ((*this + other).num_vars(), num_vars());
    
    std::vector<size_t> locations(other.num_vars());
    // NOTE this depends on the list of variables in both domains being sorted
    size_t subdomain_idx = 0;
    for(size_t i=0; i<num_vars(); ++i) {
      if(subdomain_idx < other.num_vars() && 
          var(i) == other.var(subdomain_idx)) {
        locations.at(subdomain_idx) = i;
        ++subdomain_idx;
      }
    }
    
    return locations;
  }

  // get the index within our domain of each variable in other.
  // other must be a subset 
  std::vector<size_t> vars_location(const std::vector<discrete_variable>& other) const {
    // ensure that the other domain is a subset of our domain
    DCHECK_EQ((*this + discrete_bounds(other)).num_vars(), num_vars());
    
    std::vector<size_t> locations(other.size());
    for(size_t i=0; i<other.size(); ++i) {
      locations.at(i) = var_location(other[i].id());
    }
    
    return locations;
  }

  //! determine the number of assignments
  // NOTE recomputing this every time it is needed is burdensome. the size could 
  // be cached (in a mutable var) and only recomputed when _num_vars changes. 
  size_t size() const { 
    size_t sum = 0;
    if(num_vars() > 0) {
      sum = 1;
      for(size_t i = 0; i < num_vars(); ++i) {
        // ensure variables to be sorted order
        if(i > 0) { DCHECK_LT( _vars[ i-1], _vars[i] ); }
        // and have positive arity
        DCHECK_GT(_vars[i].size(), 0);
        sum *= _vars[i].size();
      }
    }
    return sum;
  }

  void load(graphlab::iarchive& arc) {
    arc >> _num_vars;
    DCHECK_LE(_num_vars, MAX_DIM);
    for(size_t i = 0; i < _num_vars; ++i) arc >> _vars[i];
  }
    
  void save(graphlab::oarchive& arc) const {
    arc << _num_vars;
    for(size_t i = 0; i < _num_vars; ++i) arc << _vars[i];
  }

private:
  //mutable size_t _size_cached;
  //mutable size_t _last_num_vars;
  size_t _num_vars;
  // REVIEW C style array is space inefficient. a vector might be 
  // better. it would have to be resized if the domain was modified; 
  // however, it would remove the need for the template parameter.
  //   i tried this (37fef6f16b6d). creating the vector on the heap is 
  //   far too slow 
  discrete_variable _vars[MAX_DIM];
};


template<size_t MAX_DIM>
std::ostream& operator<<(std::ostream& out,
                   const discrete_bounds<MAX_DIM>& dom) {
  out << "{";
  for(size_t i = 0; i < dom.num_vars(); ++i) {
    out << dom.var(i) << "[0:" << dom.var(i).size()-1 << "]";
    if( i < dom.num_vars()-1 ) out << ", ";
  }
  return out << "} ";  
}


}; // end of namespace graphlab


#include <graphlab/macros_undef.hpp>

#endif // DISCRETE_BOUNDS_HPP


================================================
FILE: toolkits/graphical_models/factors/discrete_domain.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef DISCRETE_DOMAIN_HPP
#define DISCRETE_DOMAIN_HPP

#include <graphlab/logger/assertions.hpp>

#include "discrete_variable.hpp"
#include "discrete_bounds.hpp"
#include "discrete_assignment.hpp"

#include <graphlab/macros_def.hpp>
namespace graphlab {


/**
 * This class respresents a discrete domain over a set of variables.
 *
 * \author Scott Richardson     4/2013
 */
template<size_t MAX_DIM>
class discrete_domain : public discrete_bounds<MAX_DIM> {
  typedef discrete_bounds<MAX_DIM>     bounds_type;
  typedef discrete_assignment<MAX_DIM> assignment_type;

public:
  //! Make an empty domain
  discrete_domain() : 
      bounds_type() { }

  //! Make a single variable discrete_domain
  discrete_domain(const discrete_variable& v1) :
      bounds_type(v1) { }

  //! Make a two variable discrete_domain
  discrete_domain(const discrete_variable& v1, const discrete_variable& v2) :
      bounds_type(v1, v2) { }

  //! Make a three variable discrete_domain
  discrete_domain(const discrete_variable& v1,
                  const discrete_variable& v2,
                  const discrete_variable& v3) :
      bounds_type(v1, v2, v3) { }

  //! Make a discrete_domain from a vector of variables
  explicit discrete_domain(const std::vector<discrete_variable>& variables) :
      bounds_type(variables) { }

  //! Make a discrete_domain from a set of variables
  explicit discrete_domain(const std::set<discrete_variable>& variables) :
      bounds_type(variables) { }
  
  discrete_domain(const discrete_domain& other) : 
      bounds_type(other) { }

  discrete_domain(const bounds_type& other) : 
      bounds_type(other) { }

  virtual ~discrete_domain() { } 

  /** Standard assignment operator */
  discrete_domain& operator=(const discrete_domain& other) {
    if(this == &other) 
      return *this;
    
    bounds_type::operator=(other);
    return *this;
  }

  class ConstIterator;

  // Iterators 
  // from http://www.oreillynet.com/pub/a/network/2005/11/21/what-is-iterator-in-c-plus-plus-part2.html
  // and http://www.cs.helsinki.fi/u/tpkarkka/alglib/k06/lectures/Iterators.html
  // although i think i should use this reference: 
  // http://www.drdobbs.com/the-standard-librarian-defining-iterato/184401331?pgno=3
  class Iterator : 
      public std::iterator<std::forward_iterator_tag, assignment_type >
  {
  private:
    friend class ConstIterator;
    typedef discrete_domain<MAX_DIM> domain_type;

    typedef std::iterator<std::forward_iterator_tag, assignment_type > iterator_t;
    // "using typename" doesn't actually work in GCC < 4.7, which we don't have installed
    // everywhere. The "typedef typename" construct seems to, so stick with that for now.
    //using typename iterator_t::value_type;
    //using typename iterator_t::reference;
    //using typename iterator_t::pointer;
    typedef typename iterator_t::value_type   value_type_custom;
    typedef typename iterator_t::reference    reference_custom;
    typedef typename iterator_t::pointer      pointer_custom;

    assignment_type _asg;

  public:
    explicit Iterator(const domain_type& dom) {
      // initilize a new assignment
      _asg = assignment_type(dom);
    }
    explicit Iterator(const assignment_type& asg) : _asg(asg) { }
    Iterator& operator=(const Iterator& other) {
      if(this == &other) return *this;

      _asg = other._asg;
      return *this;
    }
    // implicit copy constructor, copy assignment and destructor
    bool operator==(const Iterator& other) const {
      return _asg == other._asg;
    }
    bool operator!=(const Iterator& other) const {
      return !(*this == other);
    }
    reference_custom operator*() {
      return _asg;
    }
    pointer_custom operator->() {
      // this may be more correct, but less clear
      //return &*(domain_type::Iterator)*this;
      return &_asg;
    }
    Iterator& operator++() {
      ++_asg;
      return *this;
    }
    Iterator operator++(int) {
      Iterator orig = *this; 
      ++(*this); 
      return orig;
    }
  };

  class ConstIterator : 
      public std::iterator<std::forward_iterator_tag, const assignment_type > 
  {
    typedef discrete_domain<MAX_DIM> domain_type;

    typedef std::iterator<std::forward_iterator_tag, const assignment_type > const_iterator_t;
    //using typename const_iterator_t::value_type;
    //using typename const_iterator_t::reference;
    //using typename const_iterator_t::pointer;
    typedef typename const_iterator_t::value_type   value_type_custom;
    typedef typename const_iterator_t::reference    reference_custom;
    typedef typename const_iterator_t::pointer      pointer_custom;

    assignment_type _asg;

  public:
    explicit ConstIterator(const domain_type& dom) {
      // initilize a new assignment
      _asg = assignment_type(dom);
    }
    explicit ConstIterator(const assignment_type& asg) : _asg(asg) { }
    ConstIterator(const Iterator& other) : _asg(other._asg) { }
    // implicit copy constructor and destructor
    ConstIterator& operator=(const ConstIterator& other) {
      if(this == &other) return *this;

      _asg = other._asg;
      return *this;
    }
    bool operator==(const ConstIterator& other) const {
      return _asg == other._asg;
    }
    bool operator!=(const ConstIterator& other) const {
      return !(*this == other);
    }
    reference_custom operator*() const {
      return _asg;
    }
    pointer_custom operator->() const {
      return &_asg;
    }
    ConstIterator& operator++() {
      ++_asg;
      return *this;
    }
    ConstIterator operator++(int) {
      ConstIterator orig = *this; 
      ++(*this); 
      return orig;
    }
  };

  Iterator begin() const { 
    return Iterator(*this); 
  }

  Iterator end() const { 
    Iterator ret(*this);
    ret->make_end();
    return ret;
  }

public:
  typedef Iterator       iterator;
  typedef ConstIterator  const_iterator;
};

}; // end of namespace graphlab


#include <graphlab/macros_undef.hpp>

#endif // DISCRETE_DOMAIN_HPP


================================================
FILE: toolkits/graphical_models/factors/discrete_variable.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef DISCRETE_VARIABLE_HPP
#define DISCRETE_VARIABLE_HPP
/**
 * This file contains the definitions of a simple discrete variable.
 *
 *  \author Joseph Gonzalez
 */

#include <iostream>
#include <sstream>

#include <graphlab/logger/assertions.hpp>
#include <graphlab/serialization/serialization_includes.hpp>


// Include the macro for the for each operation
//#include <graphlab/macros_def.hpp>
namespace graphlab {


/** represents a discrete variable */
class discrete_variable {
public:
  //! The type used to store the variable id
  typedef uint32_t id_type;
  //! the type used to index the variable assignments
  typedef uint32_t index_type;

  discrete_variable() : id_(0), nasgs_(0) { }

  /** construct a discrte variable with a given id and number of
      assignments */
  discrete_variable(id_type id, index_type nasgs) : 
    id_(id), 
    nasgs_(nasgs) { }
  discrete_variable(const discrete_variable& other) :
    id_(other.id_), 
    nasgs_(other.nasgs_) { }
  discrete_variable& operator=(const discrete_variable& other) {
    if(this == &other) 
      return *this;

    id_ = other.id_;
    nasgs_ = other.nasgs_;
    return *this;
  }
  //! get the variable id
  inline id_type& id() { return id_; }
  //! get the variable id
  inline const id_type& id() const { return id_; }
  //! get the number of assignments the variable can take
  inline index_type& size() { return nasgs_; }   
  //! get the number of assignments the variable can take
  inline const index_type& size() const { return nasgs_; }
  //! Compare two variables
  inline bool operator<(const discrete_variable& other) const { 
    return id_ < other.id_; 
  }
  //! test equality between two varaibles
  inline bool operator==(const discrete_variable& other) const {
    return id_ == other.id_;     
  }
  //! Test inequality between two variables
  inline bool operator!=(const discrete_variable& other) const { 
    return id_ != other.id_; 
  }

  //! load the variable from an archive
  void load(graphlab::iarchive& arc) { 
    arc >> id_ >> nasgs_;
  }
  //! save the variable to an archive
  void save(graphlab::oarchive& arc) const { 
    arc << id_ << nasgs_; 
  }
  
private:
  //! The variable id
  id_type id_;
  //! The number of assignments the variable takes
  index_type nasgs_;
};


inline std::ostream& operator<<(std::ostream& out, 
                   const graphlab::discrete_variable& var) {
  return out << "v_" << var.id();
  //           << " in {0:" << var.size()-1 << "}";
}

}; // end of namespace graphlab


//#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: toolkits/graphical_models/factors/factor_graph.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef VSI_FACTOR_GRAPH_HPP
#define VSI_FACTOR_GRAPH_HPP

#include <cassert>

#include <iostream>
#include <fstream>
#include <sstream>
#include <set>
#include <map>

//#include <graphlab/engine/engine_includes.hpp>
#include <graphlab/graph/graph_includes.hpp>
#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/vertex_program/vertex_program_includes.hpp>


#include "table_factor.hpp"
#include "bp_graph_data.h"


//#include <graphlab/macros_def.hpp>
namespace belief_prop {


/** 
 * Defines a factor_graph; i.e., a bipartite graph whose verticies can 
 * be divide into two disjoint sets: a set of variables V and a set of 
 * factors F. Undirected edges connect factors to variables. An edge 
 * exists between f and v if v is a member of the factor's domain.
 * 
 * A variable specifies a unary discrete probabiltiy mass function over 
 * a set of labels. A variable's PMF is assumed to be defined by a 
 * dense_table in which each label has a corresponding probability. 
 * 
 * A factor specifies a discrete joint probability mass function over 
 * a set of variables. The n-D PMF is defined by either a dense_table  
 * or a sparse_table. The values in the table are stored such that the 
 * first variable added to the domain iterates the fastest. 
 *
 * The typical usasge of this interface would consist of 1) adding a set
 * of variables by using add_variable(), 2) defining the prior distribution
 * over each variable by using one of the prior related methods (such as 
 * set_prior_for_variable() ), 3) adding a set of factors by using 
 * add_factor(), 4) constructing the distributed graph using make_bp_graph(), 
 * 5) running belief propagation on the distributed graph to propagate the 
 * evidence across the graph (outside the scope of this interface), and 
 * 6) loading the results using pull_beliefs_for_variables(). 
 * 
 * The resulting belief for a variable can be queried using 
 * belief_for_variable().logP() (once the evidence has been propagated 
 * across the distributed graph and the results pulled back into the  
 * factor_graph using pull_beliefs_for_variables() ). 
 *
 * \author Scott Richardson     10/2012
 *
 */
// NOTE a variable has a domain that spans only itself. a factor has
// a domain that spans its neighboring variables
template<size_t MAX_DIM>
class factor_graph {
  typedef typename graph_type<MAX_DIM>::type graph_type_t;
  typedef vertex_data<MAX_DIM>               vertex_data_t;
  typedef edge_data<MAX_DIM>                 edge_data_t;
  // dense_table is used to define the distributions over variables.  
  // *this shouldnt need to know specifically about sparse_table
  typedef graphlab::dense_table<MAX_DIM>     dense_table_t;
  typedef graphlab::table_base<MAX_DIM>      table_base_t;
  typedef graphlab::table_factor<MAX_DIM>    factor_type;
  typedef graphlab::discrete_variable        variable_type;

// variable related methods
public: 
  factor_graph() : _unique_var_id(0) { }

public: 
  /** Add a new discrete variable to the factor graph */
  // TODO rename to create_variable()
  variable_type add_variable(const size_t n_labels, 
      const std::string& default_var_name="") 
  {
    size_t id = _unique_var_id++;
    // assert that the id can be used to index into _factors
    DCHECK_EQ(id, num_factors());

    // Only store the variable in the local variable map if the variable (key) 
    // does not exist in the graph (map) already
//    var_map_const_iter_type it = _var_map.find(var_name);
//    if(it != _var_map.end()) {
//      std::cout << "WARNING: variable already exists for that name" << std::endl;
//      return it->second;
//    }

    // Create a new variable
    variable_type variable(id, n_labels);

    std::string var_name(default_var_name);
    if(var_name.empty()) {
      std::stringstream ss; ss << id;
      var_name = ss.str();
    } 
    // Save the factor to the factor graph
    vertex_data_t vert = add_vertex(variable, var_name);
    logstream(LOG_INFO) << "var_id=" << id 
                        << " description='" << vert.name << "'" << std::endl;

    return variable; 
  }

  /** 
   * Direct access to the variable's belief distribution.  
   * useful for initialization 
   */
  dense_table_t& belief_for_variable(const variable_type& var) {
    DCHECK_LT(var.id(), num_factors());

    // NOTE the variable's prior distribution is always dense
    dense_table_t* belief = 
        dynamic_cast<dense_table_t*>(factors()[var.id()].belief.table());
    DCHECK_NE(belief, NULL); 

    return *belief;
  }
  dense_table_t& belief_for_var(const variable_type& var) {
    return belief_for_variable(var);
  }
  /** 
   * Direct access to the variable's belief distribution.  
   */
  const dense_table_t& belief_for_variable(const variable_type& var) const {
    DCHECK_LT(var.id(), num_factors());

    // NOTE the variable's prior distribution is always dense
    dense_table_t const *const belief = 
        dynamic_cast<dense_table_t const *const>(
        factors()[var.id()].belief.table());
    DCHECK_NE(belief, NULL); 

    return *belief;
  }
  const dense_table_t& belief_for_var(const variable_type& var) const {
    return belief_for_variable(var);
  }

  /** 
   * Direct access to the variable's prior distribution.  
   * useful for initialization 
   */
  dense_table_t& prior_for_variable(const variable_type& var) {
    DCHECK_LT(var.id(), num_factors());

    // NOTE the variable's prior distribution is always dense
    dense_table_t* potential = 
        dynamic_cast<dense_table_t*>(factors()[var.id()].potential.table());
    DCHECK_NE(potential, NULL); 

    return *potential;
  }
  /** 
   * Direct access to the variable's prior distribution.  
   */
  const dense_table_t& prior_for_variable(const variable_type& var) const {
    DCHECK_LT(var.id(), num_factors());

    // NOTE the variable's prior distribution is always dense
    dense_table_t const *const potential = 
        dynamic_cast<dense_table_t const *const>(factors()[var.id()].potential.table());
    DCHECK_NE(potential, NULL); 

    return *potential;
  }

  // TODO rename to stage_prior_for_variable, etc. 
  void set_prior_for_variable(const variable_type& var, 
      const std::vector<double>& data) 
  {
    DCHECK_LT(var.id(), num_factors());
    factor_type& potential = factors()[var.id()].potential;
    // NOTE the variable's prior distribution is always dense
    dense_table_t* table = dynamic_cast<dense_table_t* >(potential.table());
    DCHECK_NE(table, NULL);

    *table = dense_table_t(table->domain(), data);
  }

  void set_belief_for_variable(const variable_type& var, 
      const std::vector<double>& data) 
  {
    DCHECK_LT(var.id(), num_factors());
    factor_type& belief = factors()[var.id()].belief;
    // NOTE the variable's prior distribution is always dense
    dense_table_t* table = dynamic_cast<dense_table_t* >(belief.table());
    DCHECK_NE(table, NULL);

    *table = dense_table_t(table->domain(), data);
  }

  void set_prior_for_variable(const variable_type& var, 
      const dense_table_t& table) 
  {
    DCHECK_LT(var.id(), num_factors());

    factors()[var.id()].potential = factor_type(factor_type::DENSE_TABLE, table);
  }

  void set_belief_for_variable(const variable_type& var, 
      const dense_table_t& table) 
  {
    DCHECK_LT(var.id(), num_factors());

    factors()[var.id()].belief = factor_type(factor_type::DENSE_TABLE, table);
  }

  variable_type get_variable(const size_t id) {
    DCHECK_LT(id, num_factors());
    DCHECK_EQ(factors()[id].potential.table()->ndims(), 1);
    return factors()[id].potential.table()->var(0);
  }

private:
  // REVIEW prevent a variable from being double added
  const vertex_data_t& add_vertex(const variable_type &variable, 
      const std::string& var_name) 
  {
    // assert that the id can be used to index into _factors
    DCHECK_EQ(variable.id(), num_factors());

    // Define a unary factor (the concept not the class) over the var to 
    // support a prior. 
    // NOTE the variable's prior distribution is always dense
    factor_type prior(factor_type::DENSE_TABLE, dense_table_t(variable));
    // using shift normalization, this is equivalent to uniform()
    prior.table()->zero();

    // NOTE the variable's prior distribution is always dense
    factor_type belief(factor_type::DENSE_TABLE, dense_table_t(variable));
    // using shift normalization, this is equivalent to uniform()
    belief.table()->zero(); 
    // assert that prior and belief have the same domain
    //ASSERT_TRUE(belief.domain() == prior.domain());

    // catalog the variable and associated metadata
    vertex_data_t vertex(prior, belief, true, var_name);
    _factors.push_back(vertex);
    //ASSERT_EQ(_factors[variable.id()].belief, vertex.belief);
    //ASSERT_EQ(_factors[variable.id()].potential, vertex.potential);

    return _factors.back();
  }

  factor_type uniform_factor_from_factor(const factor_type& factor) {
    factor_type belief = factor;
    // using shift normalization, this is equivalent to uniform()
    belief.table()->zero(); 

    return belief;
  }

// factor related methods
public:
  /** Add a new discrete factor to the factor graph */
  // REVIEW prevent a factor from being double added
  void add_factor(const table_base_t& factor, 
      const std::string& default_factor_name = "") 
  {
    size_t id = _unique_var_id++;
    DCHECK_NE(factor.ndims(), 0);
    // assert that the id can be used to index into _factors
    DCHECK_EQ(id, num_factors());

    std::string factor_name; 
    if(default_factor_name.empty()) {
      std::stringstream ss;
      ss << id;
      factor_name = ss.str();
    } else { 
      factor_name = default_factor_name;
    }

    // assert all variables have already been added to the graph
    logstream(LOG_INFO) << "ndims=" << factor.ndims() << " id=" << id 
        << " description='" << factor_name << "'" << std::endl;
    for(size_t i = 0; i < factor.ndims(); ++i) {
      logstream(LOG_INFO) << "  factor.var(" << i << ").id()=" << factor.var(i).id() << std::endl;
      DCHECK_LT(factor.var(i).id(), num_factors());
      DCHECK_EQ(factor.var(i).id(), get_variable(factor.var(i).id()).id());
    }

    factor_type node(factor);
    factor_type uniform = uniform_factor_from_factor(node);
    vertex_data_t vertex(node, uniform, false, factor_name);
    _factors.push_back(vertex);
    //ASSERT_EQ(_factors[id], vertex);
  }

// utils
public:
  size_t num_factors() const {
    return factors().size();
  }

  // FIXME O(n)
  size_t num_variables() const {
    size_t ndims = 0;
    for(typename std::vector<vertex_data_t >::const_iterator factor = factors().begin(); 
        factor != factors().end(); ++factor) {
      // any vertex that has a domain with only a single dimension is a variable ...
      if(factor->potential.table()->ndims() == 1) {
        ndims++;
      }
    }
    return ndims;
  }

  const std::string& name(const size_t id) const {
    DCHECK_LT(id, num_factors());
    return factors()[id].name;
  }

  /**
   * write a dot file which can be loaded into graphviz
   */
  void save_graph_summary(const std::string& filename) {
    DCHECK_EQ(_unique_var_id, num_factors());

    std::ofstream fout(filename.c_str());
    if(fout.is_open() == false) {
      std::cerr << "ERROR: " << filename << " not opened." << std::endl;
      return;
    }

    fout << "graph G {" << std::endl;
    fout << "layout=sfdp;" << std::endl;
    fout << "overlap=false;" << std::endl; 
    //fout << "sccmap;" << std::endl;
    fout << "K=2;" << std::endl;
    //fout << "clusterrank=local;" << std::endl;

    // Iterate all the factors and all the edges. NOTE all variables are also factors
    typename std::vector<vertex_data_t >::const_iterator factor;
    size_t factor_idx = 0;
    for(factor = factors().begin(); 
        factor != factors().end(); ++factor_idx, ++factor) 
    {
      //fout << "subgraph cluster_" << factor_idx << " {" << std::endl;
      //fout << "color=none;" << std::endl;
      // Iterate edges for a factor
      for(size_t i = 0; i < factor->potential.table()->ndims(); ++i) {
        variable_type variable = factor->potential.table()->var(i);
        // all variables are also factors, dont link a variable to itself
        if(variable.id() == factor_idx) continue; 

        fout << "\"" << factor->name << " {" << factor_idx << "}" 
               << "\" -- \"" << name(variable.id()) << "{" << variable.id() << "}" 
               << "\";" << std::endl; 
      }
      //fout << "}" << std::endl;
    }
    fout << "}" << std::endl;
  } // end of save_graph_summary

  /**
   * Construct a belief propagation graph from a factor graph
   */
  // NOTE could rewrite this function to construct the graph in parallel using 
  // the graphlab::distributed_control obj
  // TODO rename to finalize_distributed_graph()
  // REVIEW because bound, damping and regularization are now an attribute of a 
  // factor, perhaps they should be set in add_factor()
  void make_bp_graph(graph_type_t& graph, 
      double bound, double damping, double regularization=0.0) 
  {
    DCHECK_NE(num_factors(), 0);
    DCHECK_EQ(_unique_var_id, num_factors());

    // TODO clear the graph
    
    graphlab::timer timer; 

    if (graph.dc().procid() == 0) 
    {
      // Add all the factors and all the edges. NOTE all variables are also factors
      typename std::vector<vertex_data_t >::iterator factor = factors().begin();
      typename std::vector<vertex_data_t >::const_iterator end = factors().end();
      size_t factor_idx = 0;
      for( ; factor != end; ++factor_idx, ++factor) {
        // Add the factor to the graph
        factor->BOUND = bound; 
        factor->DAMPING = damping;
        factor->REGULARIZATION = regularization;
        graph.add_vertex(factor_idx, *factor);
        // TODO does the order in which i add variables and factors matter? 

        // Attach all the edges
        for(size_t i = 0; i < factor->potential.table()->ndims(); ++i) {
          variable_type variable = factor->potential.table()->var(i);
          // all variables are also factors, dont link a variable to itself
          if(variable.id() == factor_idx) continue; 

          // NOTE from graph::add_edge() - An edge can only be added if both the  
          // source and target vertex id's are already in the graph. Duplicate  
          // edges are not supported and may result in undefined behavior
          // NOTE messages are always dense
          dense_table_t msg(variable);
          msg.zero(); // using shift normalization, this is equivalent to uniform()
          graph.add_edge(factor_idx, variable.id(), edge_data_t(msg));
        }
      }
      graph.dc().cout() << "Loading graph. Finished in " 
          << timer.current_time() << std::endl;
    }
    timer.start();
    graph.finalize();
    graph.dc().cout() << "Finalizing graph. Finished in " 
        << timer.current_time() << std::endl;

    graph.dc().cout() 
      << "================ "
      << "Graph statistics on proc " << graph.dc().procid() << " of " << graph.dc().numprocs()
      << " ==============="
      << "\n Num vertices: " << graph.num_vertices()
      << "\n Num edges: " << graph.num_edges()
      << "\n Num replica: " << graph.num_replicas()
      << "\n Replica to vertex ratio: " 
      << float(graph.num_replicas())/graph.num_vertices()
      << "\n --------------------------------------------" 
      << "\n Num local own vertices: " << graph.num_local_own_vertices()
      << "\n Num local vertices: " << graph.num_local_vertices()
      << "\n Replica to own ratio: " 
      << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
      << "\n Num local edges: " << graph.num_local_edges()
      //<< "\n Begin edge id: " << graph.global_eid(0)
      << "\n Edge balance ratio: " 
      << float(graph.num_local_edges())/graph.num_edges()
      << std::endl;
  } // end of make_bp_graph

  /** 
   * Update the value of the variables' beliefs given the propegated 
   * graphlab distributed-graph. 
   */ 
  // TODO rename to pull_beliefs
  void pull_beliefs_for_variables(graph_type_t& graph) {
    typedef dense_table_t const *const const_ptr;

    // aggregate (reduce) the variable verticies into a vector
    aggregate_vertex_data agg = 
        graph.template map_reduce_vertices<aggregate_vertex_data>(
          aggregate_vertex_data()
        ); // wow

    for(unsigned i=0; i < agg.size(); ++i) {
      const vertex_data_t& ovdata = agg.agg[i];
      DCHECK_EQ(ovdata.belief.table()->ndims(), 1);

      const_ptr other = dynamic_cast<const_ptr>(ovdata.belief.table());
      if(other == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }

      vertex_data_t& vdata = factors()[other->args().var(0).id()];
      DCHECK_EQ(vdata.belief.table()->ndims(), 1);

      const_ptr tbl = dynamic_cast<const_ptr>(vdata.belief.table());
      if(tbl == NULL) {
        std::cout << "ERROR: std::bad_cast" << std::endl;
        // REVIEW should probably raise an exception
        ASSERT_TRUE(false);
      }
      ASSERT_EQ(tbl->args().var(0).id(), other->args().var(0).id());

      vdata.belief = ovdata.belief;
    }
  }

private:
  struct aggregate_vertex_data {
    std::vector<vertex_data_t> agg;

    aggregate_vertex_data() { }

    aggregate_vertex_data operator()(const typename graph_type_t::vertex_type& vertex) {
      const vertex_data_t& ovdata = vertex.data();

      aggregate_vertex_data out;
      // variables are always dense and 1D, although not all 1D vertices are variables...
      if(ovdata.isVariable == true) {
        out.agg.push_back(ovdata);
      }

      return out;
    }

    aggregate_vertex_data& operator+=(const aggregate_vertex_data& other) {
      agg.insert(agg.end(), other.agg.begin(), other.agg.end());
      return *this;
    }

    unsigned size() { return agg.size(); }

    void save(graphlab::oarchive& arc) const { arc << agg; }
    void load(graphlab::iarchive& arc) { arc >> agg; }
  };

public:
  void print_variable(const variable_type& var, const std::vector<double>& labels,
      std::ostream& out = std::cout) 
  {
    const dense_table_t& table = belief_for_variable(var);
    DCHECK_EQ(table.size(), labels.size());

    //std::ios::fmtflags f(out.flags()); // i cant believe this is how you do this

    out << "var_" << var.id() << ": " 
              << factors()[var.id()].name << std::endl;
    out << std::setw(8) << "index" << std::setw(16) << "logP" << std::setw(16) << "label" << std::endl;
    size_t end = table.size();
    for(size_t i=0; i < end; ++i) {
      out << std::setw(8) << i 
          << std::setw(16) << table.logP(i) 
          << std::setw(16) << labels[i] << std::endl;
    }

    //out.flags(f);
  }

// accessors
private:
  const std::vector<vertex_data_t>& factors() const { return _factors; }
  std::vector<vertex_data_t>& factors() { return _factors; }

private:
  // NOTE if ever multithreaded, this requires atomic access 
  size_t _unique_var_id;
  // REVIEW deep-copying data into std::vectors is slow 
  std::vector<vertex_data_t > _factors; 
};

} // end of namespace belief_prop

//#include <graphlab/macros_undef.hpp>
#endif // VSI_FACTOR_GRAPH_HPP


================================================
FILE: toolkits/graphical_models/factors/factor_graphs.dox
================================================
/**

\page factor_graphs Factor Graph Toolkit

\brief 
The Factor Graph toolkit (defined in toolkits/graphical_models/factors/factor_graph.hpp) 
is an abstraction layer which is able to translate a factor graph into a 
graphlab distributed-graph. 

\image html factor_graph_to_distributed_graph.png

A <a href="http://en.wikipedia.org/wiki/Factor_graph">factor graph</a>
is a bipartite graph composed of two types of vertices: variable nodes 
and factor nodes. 

A variable specifies a unary discrete 
<a href="http://en.wikipedia.org/wiki/Probability_mass_function">probabiltiy mass function</a> 
(PFM) over a set of labels. A variable's PMF is assumed to be defined by a 
<code>class dense_table</code> in which each label has a corresponding 
probability. 

A factor specifies a discrete joint probability mass function over 
a set of neighboring variables which form the factor's domain. The 
n-D PMF is defined by either a <code>class dense_table</code> or 
<code>class sparse_table</code> (which is basically just an N-d matrix and is 
defined in toolkits/graphical_models/factors/dense_table.hpp and sparse_table.hpp), 
in which each label has a corresponding log probability (we use the log of the 
belief value for numerical stability).
NOTE When constructing an n-D table, take care to understand the sorting order 
of the values in the table and that expected by the various accessors/constructors 
of the class.

Inference in a factor graph is commonly performed using 
<a href="http://en.wikipedia.org/wiki/Belief_propagation">loopy belief 
propagation</a> (LBP), a well-known message-passing algorithm. The vertex program 
specified in <code>bp_vertex_program.cpp</code> defines the max-sum variant of 
belief propagation (although sum-product is also supported). Messages between 
variables and factors are always dense 1-d tables. 

Belief propagation and factor graphs are general tools that have applications in a 
variety of domains. 

While GraphLab proper supports several common portable graph formats, 
such as tsv, the Factor Graph toolkit instead defines a factor graph 
programatically. The typical usasge of the <code>class factor_graph</code> 
interface would consist of:

-# adding a set of variables to the factor graph by using add_variable(), 

-# defining the prior distribution over each variable by using one 
of the prior related methods (such as set_prior_for_variable(...) )

-# adding a set of factors to the graph by using add_factor(...), 

-# constructing the distributed graph using make_bp_graph(), 

-# running belief propagation on the distributed graph to propagate the 
evidence across the graph (outside the scope of this interface), and 

-# loading the results using pull_beliefs_for_variables(...). 

The resulting belief for a variable can be queried using 
belief_for_variable(var).logP() (i.e., once the evidence has been propagated 
across the distributed graph and the results pulled back into the
factor_graph using pull_beliefs_for_variables() ). 

\section factor_graph_structured_prediction A Factor Graph for Structured Prediction

We have implemented the \ref structured_prediction "Structured Prediction" example 
using a factor graph. We can run the structured prediction application on the 
synthetic image like this:
\verbatim
> ./denoise --damping=.3 
\endverbatim

The structure prediction application applies the Loopy Belief propagation 
algorithm to a factor graph encoding encoding the classic 
<a href="http://en.wikipedia.org/wiki/Potts_model">Potts Model</a>. 

Once the application terminates, the final predictions will be stored in denoised.png.

\image html denoised.jpeg

Not bad! Given the synthetic noisy image (noisy_img.png), denoised.png is very 
similar to the true underlying image (orig_img.png) that we would like to recover.


\subsection structured_predictions_options Options

\li <b>--help</b> Display the help message describing the list of
options.

\li <b>--damping</b> (Optional, Default 0.1) The amount of damping to
use. Damping can help ensure that the algorithm converges.  Larger
damping values lead to slower but more reliable convergence.


\li <b>--engine</b> (Optional, Default: asynchronous) The engine type to
use when executing the vertex-programs
       - <b>synchronous</b>: All LoopyBP updates are run at the same
         time (Synchronous BP). This engine exposes greater parallelism but is less
         computationally efficient.
       - <b>asynchronous</b>: LoopyBP updates are run asynchronous
         with priorities (Residual BP). This engine is has greater
         overhead and exposes less parallelism but can substantially
         improve the rate over convergence.

\li <b>--ncpus</b> (Optional, Default 2) The number of local computation 
threads to use on each machine. This should typically match the number 
of physical cores. 

\li <b>--scheduler</b> (Optional, Default sweep) The scheduler to use when 
running with the asynchronous engine. The default is typically sufficient. 

\li <b>--engine_opts</b> (Optional, Default empty) Any additional engine
options. See <b>--engine_help</b> for a list of options.


\li <b>--graph_opts</b> (Optional, Default empty) Any additional graph
options. See <b>--graph_help</b> for a list of options.

\li <b>--scheduler_opts</b> (Optional, Default empty) Any additional scheduler
options. See <b>--scheduler_help</b> for a list of options.

\section Example
The test file toolkits/graphical_models/factors/tests/test_bool_var/test_cat_bool_var.cpp
creates two variables, foo and bool_var_b, each with two states, connected to a 2x2 factor (there is also a unary evidence factor (prior) attached to bool_var_b). Visually, this looks like

\verbatim
 bool_obs (factor)
     |
     |
 bool_var_b (variable : {false, true})
     |
     |
 factor
 cat/fp-tp |  false | true |
 ----------|--------|------|
 state1    |  0.1   |  0.9 |  ---  foo (variable : {state1, false pos})
 ----------|--------|------|
 false pos |  0.8   |  0.2 |
 ---------------------------
 joint belief values (cbj)
\endverbatim

This factor graph, creates two variables, foo and bool_var_b, each with two states. The variables are related to each other through the factor node, cbj. (bool_obs is also a unary evidence factor (prior) attached to bool_var_b.)

A factor or variable's probability mass function (PMF) is specified by a dense_table (which is basically just an N-d matrix and is defined in toolkits/graphical_models/factors/dense_table.hpp) in which each label has a corresponding log probability (we use the log of the belief value for numerical stability). A variable is always a 1D dense_table. Factor cbj is a 2D dense_table with a domain that spans the cross product of domains' the two variables.)

The nlog belief values in the table for this example are arbitrary. You can set them to whatever your discrete probability mass function is using one of dense_table::logP(size_t), dense_table::logP(discrete_assignment), or one of the specialized constructors. (A discrete_assignment is a subindex over a domain (in Matlab, for example, to access element (1,2) in an array, you would do myArray(1,2); analogously, a discrete_assignment specifies this assignment. I can explain these in more detail if you'l like)). 

Once the evidence has been propagated across the distributed graph and the results pulled back into the factor graph using factor_graph.hpp::pull_beliefs_for_variables(), the resulting marginal distribution for the variable var can be queried using factor_graph.hpp::belief_for_variable(var), which returns a 1D dense_table. Then, you can find the most likely value in the distribution using dense_table::max_index(), which returns the linear index of the largest value.

*/


================================================
FILE: toolkits/graphical_models/factors/fast_discrete_assignment.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef FAST_DISCRETE_ASSIGNMENT_HPP
#define FAST_DISCRETE_ASSIGNMENT_HPP

#include "discrete_variable.hpp"
#include "discrete_domain.hpp"
#include "discrete_assignment.hpp"


//#include <graphlab/macros_def.hpp>
namespace graphlab {


  /**
   * A limited version of discrete_assignment which supports
   * a smaller (and different) set of operations
   */
  template<size_t MAX_DIM>
  class fast_discrete_assignment {
      
  public:

    fast_discrete_assignment(const discrete_domain<MAX_DIM>& args) :
        _num_vars(args.num_vars()), _index(0)
    { 
      transposed = false;
      for (size_t i = 0;i < _num_vars; ++i) {
        _vars[i] = args.var(i);
        _asgs[i] = 0;
      }
      size_t multiple = 1;
      for (size_t i = 0;i < _num_vars; ++i) {
        _increment_step[i] = multiple;
        multiple *= _vars[i].size();
      }
    }

    //! Construct a fast_discrete_assignment from a discrete_assignment
    fast_discrete_assignment(const discrete_assignment<MAX_DIM>& asg) : 
      _num_vars(asg.args().num_vars()), _index(asg.linear_index()) { 
      transposed = false;
      typename discrete_assignment<MAX_DIM>::const_iterator asg_it = asg.begin();
      for (size_t i = 0; i < _num_vars; ++i) {
        _vars[i] = asg.args().var(i);
        _asgs[i] = asg_it[i];
      }
        
      size_t multiple = 1;
      for (size_t i = 0;i < _num_vars; ++i) {
        _increment_step[i] = multiple;
        multiple *= _vars[i].size();
      }
    }
    
    //! get the number of variables
    size_t num_vars() const { return _num_vars; }

    //! get the number of variables
    discrete_variable var(size_t i) const { return _vars[i]; }


    size_t linear_index() const { return _index; }
      
    //! Get the next fast_discrete_assignment
    fast_discrete_assignment& operator++() {
      // Update the discrete_assignments
      for(size_t i = 0; i < num_vars(); ++i) {
        if (_asgs[i] < (_vars[i].size() - 1)) {
          _asgs[i] = (_asgs[i] + 1);
          _index += _increment_step[i];
          return *this;
        }
        else {
          _index -= _asgs[i] * _increment_step[i];
          _asgs[i] = 0;
        }
      }
      // Reached end
      make_end();
      return *this;
    }

    void set_index(size_t index) {
      _index = index;
      recompute_asgs();
    }

    size_t asg(size_t var_id) const {
      size_t idx = var_location(var_id);
      assert(idx < num_vars());
      return _asgs[idx];
    }

    void set_asg(size_t var_id, size_t value) {
      size_t idx = var_location(var_id);
      assert(idx < num_vars());
      assert(value < var(idx).size());
      _asgs[idx] = value;
      recompute_linear_index();
    }

    //! Tests whether two fast_discrete_assignments are equal
    bool operator==(const fast_discrete_assignment& other) const {
      return _index == other._index;
    }
    //! Tests whether two fast_discrete_assignments are not equal
    bool operator!=(const fast_discrete_assignment& other) const {
      return _index != other._index;
    }

    //! Make this an ending fast_discrete_assignment
    void make_end() {
      _index = -1;
    }

    /** Makes the sub_domain the first set of variables to be incremented over
     * Can only be called once
     */
    void transpose_to_start(const discrete_domain<MAX_DIM>& sub_domain) {
      ASSERT_FALSE(transposed);
      transposed = true;
        
      size_t reorder_map[MAX_DIM];
      size_t cursubdomain_idx = 0;
      size_t remainder_idx = sub_domain.num_vars();
      for (size_t i = 0;i < num_vars(); ++i) {
        if (cursubdomain_idx < sub_domain.num_vars() && 
            _vars[i].id() == sub_domain.var(cursubdomain_idx).id()) {
          reorder_map[cursubdomain_idx] = i;
          ++cursubdomain_idx;
        }
        else {
          reorder_map[remainder_idx] = i;
          ++remainder_idx;
        }
      }
      //move the asg around

        
      uint16_t newasgs[MAX_DIM];
      size_t newincrement_step[MAX_DIM]; 
      discrete_variable newvars[MAX_DIM];
      for (size_t i = 0;i < num_vars() ; ++i) {
        newincrement_step[i] = _increment_step[reorder_map[i]];
        newasgs[i] = _asgs[reorder_map[i]];
        newvars[i] = _vars[reorder_map[i]];
      }
      // copyback
      for (size_t i = 0;i < num_vars(); ++i) {
        _asgs[i] = newasgs[i];
        _vars[i] = newvars[i];
        _increment_step[i] = newincrement_step[i];
      }
    } // end of transpose_to_start


  private:
    //! Recompute the index from the discrete_assignment
    void recompute_linear_index() {
      size_t multiple = 1;
      // Clear the index
      _index = 0;
      for(size_t i = 0; i < num_vars(); ++i) {
        _index += multiple * _asgs[i];
        //        assert(_args.var(i).nasgs > 0);
        multiple *= _vars[i].size();
      }
    }
      
    //! Recompute the discrete_assignments from the index
    void recompute_asgs() {
      size_t quotient = _index;
      for(size_t i = 0; i < num_vars(); ++i) {
        _asgs[i] = quotient % _vars[i].size();
        quotient /= _vars[i].size();
        // assert(_asgs[i] < _args.var(i).size());
      }
    }

    /** get the index of the variable or returns number of variables
        if the index is not found */
    size_t var_location(size_t var_id) const {
      size_t location = num_vars();
      for(size_t i = 0; i < num_vars() && !(location < num_vars()); ++i) {
        if(_vars[i].id() == var_id) location = i;
      }
      return location;
    }


    size_t _num_vars;
    uint32_t _index;    
    discrete_variable _vars[MAX_DIM]; // actual ordering of the assignments
    size_t _increment_step[MAX_DIM];  //increment ordering according to _vars
    uint16_t _asgs[MAX_DIM];          // assignments with respect to _vars
    bool transposed;
  };
}; // end of namespace graphlab


template<size_t MAX_DIM>
std::ostream& operator<<(std::ostream& out,
                         const graphlab::fast_discrete_assignment<MAX_DIM>& asg) {
  out << "{";
  for(size_t i = 0; i < asg.num_vars(); ++i) {
    out << "v_" << asg.var(i).id();
    if(i < asg.num_vars() - 1) out << ", ";
  }
  out << "}=" << asg.linear_index();
  return out;
}


//#include <graphlab/macros_undef.hpp>
#endif // FAST_DISCRETE_ASSIGNMENT_HPP


================================================
FILE: toolkits/graphical_models/factors/sparse_index.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef SPARSE_INDEX_HPP
#define SPARSE_INDEX_HPP


#include <stdint.h>
#include <iostream>


namespace graphlab {


// some version of this could be shared with discrete_assignment.
template<size_t MAX_DIM>
class sparse_index {
public:
  typedef uint16_t*        iterator;
  typedef const uint16_t*  const_iterator;

public: 
  sparse_index() : _num_vars(0) { } 
  explicit sparse_index(size_t num_vars) : 
      _num_vars(num_vars) 
  { 
    DCHECK_LE(_num_vars, MAX_DIM);
    for(size_t i=0; i<_num_vars; ++i)
      _asg[i] = 0;
  }
  sparse_index(size_t const* const begin, size_t const* const end) {
    _num_vars = end - begin;
    DCHECK_LE(_num_vars, MAX_DIM);
    for(size_t i=0; i<_num_vars; ++i)
      _asg[i] = begin[i];
  }
  explicit sparse_index(std::vector<size_t> asg) : 
      _num_vars(asg.size()) 
  {
    DCHECK_LE(_num_vars, MAX_DIM);     
    for(size_t i=0; i<_num_vars; ++i)
      _asg[i] = asg[i];
  }

public: 
  // SEE http://stackoverflow.com/questions/4421706/operator-overloading
  //! Tests whether two sparse_index are equal 
  bool operator==(const sparse_index& other) const {
    DCHECK_EQ(num_vars(), other.num_vars());
    
    for(size_t i=0; i<_num_vars; ++i) 
      if(_asg[i] != other._asg[i]) return false; 
    return true;
  }
  //! Tests whether this sparse_index is < other
  bool operator<(const sparse_index& other) const {
    DCHECK_EQ(num_vars(), other.num_vars());
  
    for(size_t i=0; i<_num_vars; ++i) {
      if(_asg[i] > other._asg[i]) return false;
      else if(_asg[i] < other._asg[i]) return true;
    }
    return false;
  }
  //! Tests whether two sparse_indexs are not equal
  bool operator!=(const sparse_index& other) const {
    return !this->operator==(other);
  }
  //! Tests whether this sparse_index is > other
  bool operator>(const sparse_index& other) const {
    return other.operator<(*this);
  }
  //! Tests whether this sparse_index is <= other
  bool operator<=(const sparse_index& other) const {
    return !this->operator>(other);
  }
  //! Tests whether this sparse_index is >= other
  bool operator>=(const sparse_index& other) const {
    return !this->operator<(other);
  }

  iterator begin() { return &_asg[0]; }
  iterator end() { return &_asg[_num_vars]; }
  const_iterator begin() const { return &_asg[0]; }
  const_iterator end() const { return &_asg[_num_vars]; }

// TODO i dont like these methods being public, but there is little i 
// can do about it since sparse_index doesnt know about the domain
  //! Return the assignment at the specified index 
  // NOTE this is not the assignment for a given variable id; 
  // be mindful of variable reordering 
  inline size_t asg_at(const size_t index) const {
    DCHECK_LT(index, _num_vars);
    return _asg[index];
  }

  //! Set the assignment at the specified index 
  // NOTE this is the assignment for a given index, not for a given variable id; 
  // be mindful of variable reordering 
  inline void set_asg_at(const size_t index, const size_t value) {
    DCHECK_LT(index, _num_vars);
    _asg[index] = value;
  }
  
  inline size_t num_vars() const { return _num_vars; }

  void load(graphlab::iarchive& arc) {
    arc >> _num_vars;
    ASSERT_LE(_num_vars, MAX_DIM);
    for(size_t i = 0; i < _num_vars; ++i) arc >> _asg[i];
  }
  
  void save(graphlab::oarchive& arc) const {
    arc << _num_vars;
    for(size_t i = 0; i < _num_vars; ++i) arc << _asg[i];
  }

  friend std::ostream& operator<<(std::ostream& out, 
      const sparse_index& sa) {
    for(size_t i=0; i < sa._num_vars; ++i) {
      out << sa._asg[i]; 
      if(i < sa._num_vars - 1) out << ", ";
    }
    return out;
  }

private:
  size_t   _num_vars;
  uint16_t _asg[MAX_DIM];
};

} // end of namespace graphlab

#endif // SPARSE_INDEX_HPP


================================================
FILE: toolkits/graphical_models/factors/sparse_table.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef SPARSE_TABLE_HPP
#define SPARSE_TABLE_HPP

#include <stdint.h>
#include <assert.h>

#include <iostream>
#include <algorithm>
#include <limits>
#include <vector>
#include <cmath>
#include <map>

#include <graphlab/serialization/serialization_includes.hpp>
#include <graphlab/logger/assertions.hpp>

#include "table_base.hpp"
#include "dense_table.hpp"
#include "sparse_index.hpp"


namespace graphlab {


/**
 * An n-D sparse table up to max_dim dimensions. 
 * SEE dense_table.hpp for more detail
 *
 * \author Scott Richardson     10/2012
 */
template<size_t MAX_DIM>
class sparse_table : public table_base<MAX_DIM> {
private:
  typedef sparse_table const *const    const_ptr;
  typedef table_base<MAX_DIM>          table_base_t;

  typedef discrete_variable            variable_t;
  typedef discrete_domain<MAX_DIM>     domain_t;
  typedef discrete_assignment<MAX_DIM> assignment_t;
  typedef dense_table<MAX_DIM>         dense_table_t;

public:
  typedef sparse_index<MAX_DIM> sparse_index_t;
  typedef std::vector<std::pair<sparse_index_t, double> > compact_data_t;
private:
  typedef std::map<sparse_index_t, double> sparse_data_t;
  typedef std::vector<std::pair<const sparse_index_t*, double*> >       compact_view_t;
  typedef std::vector<std::pair<const sparse_index_t*, const double*> > compact_const_view_t;

public:
  /** Construct an empty table */
  sparse_table() { } 

  /** Construct a table over the given domain */
  sparse_table(const domain_t& dom) {
    set_domain(dom);
  }

  /** Construct a table over the given domain 
   * dom  : the domain over which the table is defined 
   * data : a vector of assignment-value pairs. the assignment must
   *        be sorted according to dom; that is, such that the 
   *        variable with the smallest id iterates fastest 
   */
  sparse_table(const domain_t& dom, const sparse_data_t& data) {
    set_domain(dom);
    _dataAtAsg = data;
  }
  
  /** Construct a table over the given domain 
   * vars : a vector of variables that compose the domain
   * data : a vector of values serialized such that the first 
   *        variable in vars iterates the fastest
   * NOTE this is a convenience constructor. the entries in the 
   * vector are re-sorted such that the variable with the smallest
   * id iterates fastest
   */
  sparse_table(const std::vector<variable_t>& vars, 
      const std::vector<std::pair<size_t, double> >& data) 
  {
    // Construct the arguments (which will remap the domain)
    set_domain(domain_t(vars));

    // create a faux domain with the size of the dimensions ordered correctly. this
    // is essentially a permute operation.
    domain_t dom;
    for(size_t i=0; i<vars.size(); ++i) {
      domain_t d1(variable_t(i, vars[i].size()));
      dom += d1;
    }

    for(size_t i=0; i < data.size(); ++i) { 
      size_t sparse_idx = data[i].first;
      assignment_t asg(dom, sparse_idx);

      // permute the assignment
      std::vector<size_t> asgs(asg.begin(), asg.end());
      assignment_t fast_asg(vars, asgs);
      set_logP(fast_asg, data[i].second);
    }
  }

  /** Construct an empty table over the given variable */
  sparse_table(const variable_t& args) { 
    // Construct the arguments (which will remap the domain)
    set_domain(domain_t(args));
  }
  
  /** Construct an empty table over the given domain */
  sparse_table(const std::vector<variable_t>& args) { 
    // Construct the arguments (which will remap the domain)
    set_domain(domain_t(args));
  }

// NOTE currently, implementing the (big) three isnt strictly necessary
  /** Construct a copy */
  sparse_table(const sparse_table& other) : 
      _args(other._args), _dataAtAsg(other._dataAtAsg) { }

  /** Destructor */
  virtual ~sparse_table() { }

  // REVIEW currently, this isnt necessary
  /** Standard assignment operator */
  sparse_table& operator=(const sparse_table& other) {
    if(this == &other) 
      return *this;
  
    _args = other._args;
    //_dataAtAsg.insert(other._dataAtAsg.begin(), other._dataAtAsg.end());
    _dataAtAsg = other._dataAtAsg;
    return *this;
  }

public: 
  using table_base_t::APPROX_LOG_ZERO;

  // if the data structures between the two tables is equivilent, this is faster
  sparse_table& copy_onto(const sparse_table& other) {
    if(this == &other) 
      return *this;
  
    // ensure the domains are the same 
    DCHECK_EQ(args(), other.args());
    // ensure the number of non-zero entries are the same (sanity check)
    DCHECK_EQ(_dataAtAsg.size(), other._dataAtAsg.size());

    typename sparse_data_t::iterator entry     = _dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = _dataAtAsg.end();
    typename sparse_data_t::const_iterator oentry = other._dataAtAsg.begin();
    for( ; entry != end; ++entry, ++oentry) {
      // ensure the two assignments are equivilent (std::map should sort them similarly)
      DCHECK_EQ(entry->first, oentry->first);
      __set_logP(entry->second, oentry->second);
    }
    // slower
    //_dataAtAsg.insert(other._dataAtAsg.begin(), other._dataAtAsg.end());

    return *this;
  }

  /** 
   * Reset the domain for the table. A domain is defined by a vector of
   * variables, and an assignment is defined over that domain. 
   */
  void set_domain(const domain_t& args) {
    _args = args;
    _dataAtAsg.clear();
  }
  const domain_t& domain() const {
    return args();
  }

  bool operator==(const sparse_table& other) {
    // are the two domains equal
    if(args() != other.args()) return false;
    // are there the same number of non-zero elements in the two tables
    if(_dataAtAsg.size() != other._dataAtAsg.size()) return false;

    typename sparse_data_t::iterator entry     = _dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = _dataAtAsg.end();
    typename sparse_data_t::const_iterator oentry = other._dataAtAsg.begin();
    for( ; entry != end; ++entry, ++oentry) {
      // is the assignment the same (std::map should sort them similarly)
      if(entry->first != oentry->first) return false;
      // is the value the same
      if(entry->second != oentry->second) return false;
    }
    return true;
  }
  bool operator!=(const sparse_table& other) {
    return !this->operator==(other);
  }

  /** 
   * Return the variable at the given index within the domain. (var(i) 
   * specifies the dimension associated with the i'th element of an 
   * assignment, i.e., sparse_index::_asg[i])
   */
  virtual const variable_t& var(const size_t index) const {
    return args().var(index);
  }
  /** 
   * Return the index for a given variable within the domain (as well as
   * into sparse_index::_asg[]).
   */
  size_t var_location(const variable_t& var) {
    return args().var_location(var);
  }

  /** Return the number of dimensions in the domain */
  virtual size_t ndims() const { return args().num_vars(); }
  /** Return the number of elements in the domain: prod(size(table)) */
  virtual size_t numel() const { return args().size(); } 
  /** Return the number of non-zero elements in the table */
  size_t nnz() const { return _dataAtAsg.size(); }

  /** Zero existing entries in the table */
  virtual void zero() {
    typename sparse_data_t::iterator entry     = _dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = _dataAtAsg.end();
    for( ; entry != end; ++entry) {
      entry->second = 0.0;
    }
  }
  
private:

  inline void remove_logP(const sparse_index_t& asg) {
    // the assignment must be within the domain
    DASSERT_TRUE(validate_asg(asg)); 

    if(_dataAtAsg.count(asg) == 0) return;

    _dataAtAsg.erase(asg);
  }

  // REVIEW i dont love this method, but it does afford me some bounds checking. 
  // set_logP(sparse_index_t&, double), seems cleaner, but it has to 
  // re-lookup the pointer. very slow. 
  inline void __set_logP(double& tbl_ref, const double& val) {
    tbl_ref = std::max(val, APPROX_LOG_ZERO());
  }

  inline void set_logP(const size_t linear_index, const double& val) {
    set_logP(compute_asg(linear_index), val);
  }
  inline void set_logP(const std::vector<size_t>& asg, const double& val) {
    set_logP(sparse_index_t(asg), val); 
  }
  inline void set_logP(const sparse_index_t& asg, const double& val) {
    // the assignment must be within the domain
    DASSERT_TRUE(validate_asg(asg)); 

    _dataAtAsg[asg] = std::max(val, APPROX_LOG_ZERO());
  }

  inline double logP(const sparse_index_t& asg) const {
    DASSERT_TRUE(validate_asg(asg)); 

    // O(log(n))
    typename sparse_data_t::const_iterator val = _dataAtAsg.find(asg);
    return val == _dataAtAsg.end() ? APPROX_LOG_ZERO() : val->second;
  }

public:
  /** 
   *  Add an entry to the sparse table (indexed by its sparse_index).
   *  Clip values to be greater than or equal to APPROX_LOG_ZERO.
   */
  // NOTE the assignment is not removed from the domain if val is APPROX_LOG_ZERO. 
  // in the future, if these values are removed, it could invalidate any iterator
  // over the list of sparse assignments.
  inline void set_logP(const assignment_t& asg, const double& val) {
    DCHECK_EQ(asg.args(), args());
    set_logP(as_sparse_index(asg), val);
  }
  /** Remove an entry from the sparse table and its corresponding sparse_index) */ 
  inline void remove_logP(const assignment_t& asg) {
    DCHECK_EQ(asg.args(), args());
    remove_logP(as_sparse_index(asg));
  }
  // NOTE index is serialized according to the linear indexing of the domain
  // TODO can i make this private? 
  inline double logP(const size_t linear_index) const {
    return logP(compute_asg(linear_index));
  }
  /** Return an entry from the sparse table (indexed by its sparse_index) */ 
  inline double logP(const assignment_t& asg) const {
    DCHECK_EQ(asg.args(), args());
    return logP(as_sparse_index(asg));
  }

  //! this(x) /= other(x);
  // supports broadcasting of a sub-domain across the full domain 
  sparse_table& operator/=(const dense_table_t& other) {
    return for_each_assignment(other, divides());
    return *this;
  }

  //! this(x) *= other(x);
  // supports broadcasting of a sub-domain across the full domain 
  sparse_table& operator*=(const dense_table_t& other) {
    return for_each_assignment(other, multiplies());
  }

  //! this(x) /= other(x);
  // supports broadcasting of a sub-domain across the full domain 
  sparse_table& operator/=(const sparse_table& other) {
    return for_each_assignment(other, divides());
  }
    //! this(x) *= other(x);
  // supports broadcasting of a sub-domain across the full domain 
  sparse_table& operator*=(const sparse_table& other) {
    return for_each_assignment(other, multiplies());
  }

private:
  struct divides {
    inline double operator()(const double& a, const double& b) const {
      return a - b; 
    }
  };
  struct multiplies {
    inline double operator()(const double& a, const double& b) const {
      return a + b; 
    }
  };

  template<class Func>
  inline sparse_table& for_each_assignment(const dense_table_t& other, const Func& f) {
    // other domain must be a subset of this domain
    DCHECK_EQ((args() + other.args()).num_vars(), args().num_vars());

    assignment_t dense_asg(args()); 
    // only need to operate on the the assignments in the sparse table
    // (equivalently, the intersection of the sparse and dense assignments)
    typename sparse_data_t::iterator it        = _dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = _dataAtAsg.end();
    for( ; it != end; ++it) {
      dense_asg.set_index(linear_index(it->first));
      //double val = it->second + other.logP(dense_asg));
      //it->second = val;
      double val = f(it->second, other.logP(dense_asg));
      __set_logP(it->second, val);
    }
    return *this;
  }

  template<class Func>
  sparse_table& for_each_assignment(const sparse_table& other, const Func& f) {

    // if the tables span the same domain
    if(args() == other.args()) {
      DCHECK_EQ(numel(), other.numel());
      
      // NOTE the assignments NOT in the intersection of the two sparse tables 
      // will be APPROX_LOG_ZERO() and are removed in *this
      intersect(other);
      typename sparse_data_t::iterator it        = _dataAtAsg.begin();
      typename sparse_data_t::const_iterator end = _dataAtAsg.end();
      typename sparse_data_t::const_iterator other_it = other._dataAtAsg.begin();
      for( ; it != end; ++it, ++other_it) {
        //double val = it->second + other_it->second);
        //it->second = val;
        double val = f(it->second, other_it->second);
        __set_logP(it->second, val);
      }
    }
    // else, broadcast the sub-domain across the full domain
    else { 
      // other domain must be a subset of this domain
      DCHECK_EQ((args() + other.args()).num_vars(), args().num_vars());
      
      compact_view_t compact_view = as_vector_view();
      compact_const_view_t other_compact_view = other.as_vector_view();
      // define the one-to-one mapping from other's domain to our's
      std::vector<size_t> sorting_inds = args().vars_location(other.args());
      // reorder the assignments so they can be quickly iterated over
      permute(sorting_inds, compact_view);
      other.permute(other_compact_view);
      
      // Loop over x
      // NOTE the assignments are sorted the same. ie. our assignments share the same 
      // ordering over the sub-domain spaned by msg as the assignments in msg.
      typename compact_const_view_t::const_iterator x_fastasg = other_compact_view.begin();
      typename compact_const_view_t::const_iterator x_end     = other_compact_view.end();
      typename compact_view_t::iterator       y_fastasg = compact_view.begin();
      typename compact_view_t::const_iterator y_end     = compact_view.end();
      sparse_index_t yasg;
      for( ; x_fastasg < x_end; ++x_fastasg) {
        while(y_fastasg != y_end) {
          yasg = restrict(*(y_fastasg->first), other.args());
          if(*(x_fastasg->first) > yasg) { 
            ++y_fastasg;
            continue;
          } 
          else if(*(x_fastasg->first) < yasg) {
            ++x_fastasg;
            break;
          }
          // else the sub-assignments are equal
          else {
            //double val = *(y_fastasg->second) + *(x_fastasg->second);
            //*(y_fastasg->second) = val;
            double val = f(*(y_fastasg->second), *(x_fastasg->second));
            __set_logP(*(y_fastasg->second), val);
            ++y_fastasg;
          }
        }
      }
    }
    return *this;
  }

public:
  using table_base_t::marginalize;

  // since the message is always a unary distribution, this is basically 
  // >> sum(
  //      reshape(
  //        permute(cavity, circshift(1:ndims(cavity), [0, -msg.dim])), 
  //      [], msg.numel),
  //    [], 1)
  // or more generally, 
  // >> sum(
  //      reshape(
  //        permute(cavity, [setdiff(1:ndims(cavity), msg.dims), msg.dims]),  
  //      [], msg.numel),
  //    [], 1)
  void marginalize(dense_table_t& msg) const {
    // No need to marginalize
    if(args() == msg.args()) {
      // Just copy and return
      as_dense_table(msg);
      return;
    }
    
    // the domains cannot be disjoint
    DCHECK_GT((args() - msg.args()).num_vars(), 0);
  
    compact_const_view_t fast_view = as_vector_view();
    // define the one-to-one mapping from the msg's domain to our's
    std::vector<size_t> sorting_inds = args().vars_location(msg.args());
    // reorder the assignments so they can be quickly iterated over
    permute(sorting_inds, fast_view); 

    assignment_t yasg(args());
    // Loop over x
    // NOTE our assignments have been reordered so we can index assignments in 
    // the two domains consecutively. e.g., if the domain of msg, {v1,v2}, is 
    // sorted in ascending order, then our assignments must also be sorted in 
    // assending order over {v1,v2} (although these sub-domains need not be 
    // be sorted the same.)
    typename compact_const_view_t::const_iterator fastyasg = fast_view.begin();
    typename compact_const_view_t::const_iterator yend     = fast_view.end();
    typename domain_t::const_iterator xasg = msg.args().begin();
    typename domain_t::const_iterator xend = msg.args().end();
    for( ; xasg != xend; ++xasg) {
      double sum = 0;

      // loop over y
      while(fastyasg != yend) {
        yasg.set_index(linear_index(*(fastyasg->first))); 
        if(*xasg != yasg.restrict(xasg->args())) break;
        
        //maxval = std::sum(maxval, _dataAtAsg[*fastyasg]);
        sum += exp(*(fastyasg->second));
        ++fastyasg;
      }
      DASSERT_FALSE( std::isinf(sum) );
      DASSERT_FALSE( std::isnan(sum) );
      DCHECK_GE(sum, 0.0);
      if(sum == 0) 
        msg.set_logP( *xasg, APPROX_LOG_ZERO() );
      else 
        msg.set_logP( *xasg, log(sum) );
    }
  }

  using table_base_t::MAP;

  // since the message is always a unary distribution, this is basically 
  // >> max(
  //      reshape(
  //        permute(cavity, circshift(1:ndims(cavity), [0, -msg.dim])), 
  //      [], msg.numel),
  //    [], 1)
  // or more generally, 
  // >> max(
  //      reshape(
  //        permute(cavity, [setdiff(1:ndims(cavity), msg.dims), msg.dims]),  
  //      [], msg.numel),
  //    [], 1)
  void MAP(dense_table_t& msg) const {
    // No need to marginalize
    if(args() == msg.args()) {
      // Just copy and return
      as_dense_table(msg);
      return;
    }
    
    // the domains cannot be disjoint
    DCHECK_GT((args() - msg.args()).num_vars(), 0);
  
    compact_const_view_t fast_view = as_vector_view();
    // define the one-to-one mapping from the msg's domain to our's
    std::vector<size_t> sorting_inds = args().vars_location(msg.args());
    // reorder the assignments so they can be quickly iterated over
    permute(sorting_inds, fast_view); 

    assignment_t yasg(args());
    // Loop over x
    // NOTE our assignments have been reordered so we can index assignments in 
    // the two domains consecutively. e.g., if the domain of msg, {v1,v2}, is 
    // sorted in ascending order, then our assignments must also be sorted in 
    // assending order over {v1,v2} (although these sub-domains need not be 
    // be sorted the same.)
    typename compact_const_view_t::const_iterator fastyasg = fast_view.begin();
    typename compact_const_view_t::const_iterator yend     = fast_view.end();
    typename domain_t::const_iterator xasg = msg.args().begin();
    typename domain_t::const_iterator xend = msg.args().end();
    for( ; xasg != xend; ++xasg) {
      double maxval = APPROX_LOG_ZERO();

      // loop over y
      while(fastyasg != yend) {
        yasg.set_index(linear_index(*(fastyasg->first))); 
        if(*xasg != yasg.restrict(xasg->args())) break;
        
        //maxval = std::max(maxval, _dataAtAsg[*fastyasg]);
        maxval = std::max(maxval, *(fastyasg->second));
        ++fastyasg;
      }
      msg.set_logP( *xasg, maxval );
    }
  }

  void intersect(const sparse_table& other) {
    map_left_intersection(_dataAtAsg, other._dataAtAsg);
  }

private:
  //! Compute the index from the sparse_index
  // NOTE index is serialized according to the linear indexing of the domain
  size_t linear_index(const sparse_index_t& asg) const {
    size_t multiple = 1;
    // Clear the index
    size_t index = 0;
    for(size_t i = 0; i < args().num_vars(); ++i) {
      index += multiple * asg.asg_at(i);
      // assert(args().var(i).nasgs > 0);
      multiple *= args().var(i).size();
    }
    return index;
  }

  /** Ensure that an asg falls within the domain */
  bool validate_asg(const sparse_index_t& asg) const {
    // no index can be larger than the number of labels in that dimension
    //return asg <= end_asg();
    for(size_t i=0; i<args().num_vars(); ++i)
      if(asg.asg_at(i) >= args().var(i).size()) return false;
    return true;
  }

  //! Compute the sparse_index from the index
  // NOTE index is serialized according to the linear indexing of the domain
  sparse_index_t compute_asg(const size_t index) const {
    DCHECK_LT(index, args().size());
  
    sparse_index_t asg(args().num_vars());
    size_t quotient = index;
    for(size_t i = 0; i < args().num_vars(); ++i) {
      asg.set_asg_at(i, quotient % args().var(i).size());
      quotient /= args().var(i).size();
      // assert(asg.asg_at(i) < args().var(i).size());
    }
    return asg;
  }

public:
  //! Compute the largest assignment possible
  assignment_t end_asg() {
    sparse_index_t asg;
    for(size_t i=0; i<args().num_vars(); ++i)
      asg.set_asg_at(i, args().var(i).size() - 1);
    return as_assignment(asg);
  }

  //! WARNING this could lead to a very large table
  void as_dense_table(dense_table_t& other) const {
    other.set_domain(args());

    typename domain_t::const_iterator asg = other.args().begin();
    typename domain_t::const_iterator end = other.args().end();
    for( ; asg != end; ++asg) {
      other.set_logP( *asg, logP(*asg) );
    }
  }

  compact_data_t as_vector() const {
    compact_data_t compact_data;
    compact_data.resize(_dataAtAsg.size());

    typename sparse_data_t::const_iterator it = _dataAtAsg.begin();
    for(size_t i = 0; i < _dataAtAsg.size(); ++i, ++it) {
      compact_data.at(i) = *it;
    }
    //std::copy(_dataAtAsg.begin(), _dataAtAsg.end(), compact_data.begin());
    return compact_data;
  }

  std::vector<sparse_index_t> keyset() const {
    std::vector<sparse_index_t> keys;
  
    typename sparse_data_t::const_iterator it  = _dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = _dataAtAsg.end();
    for( ; it != end; ++it) {
      keys.push_back(it->first);
    }
    return keys;
  }

//: virtual methods
public:
  virtual sparse_table& deep_copy(const table_base_t& base) {
    if(this == &base) return *this;
  
    // ensure we are dealing with a sparse_table
    const_ptr other = dynamic_cast<const_ptr>(&base);
    if(other == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    *this = *other;
    return *this;
  }
  virtual sparse_table& copy_onto(const table_base_t& base) {
    if(this == &base) return *this;
  
    // ensure we are dealing with a sparse_table
    const_ptr other = dynamic_cast<const_ptr>(&base);
    if(other == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    this->copy_onto(*other);
    return *this;
  }
/*  
  // NOTE this operation would turn a sparse table into a dense table
  //! this(x) += other(x);
  virtual sparse_table& plus_equals(const table_base_t& base) {
    // ensure we are dealing with a sparse_table
    const_ptr other = dynamic_cast<const_ptr>(&base);
    if(other == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    // TODO implement operator
    *this += *other;
  
    return *this;
  }
*/
  //! this(x) *= other(x);
  virtual sparse_table& times_equals(const table_base_t& base) {
    // ensure we are dealing with a sparse_table
    {
      sparse_table const* const other = 
        dynamic_cast<sparse_table const* const>(&base);
      if( NULL !=  other) {
        *this *= *other;
        return *this;
      }
    } {
      dense_table_t const* const other = 
        dynamic_cast<dense_table_t const* const>(&base);
      if( NULL != other ) { 
        *this *= *other;
        return *this;
      }
    }
    std::cout << "ERROR: std::bad_cast" << std::endl;
    // REVIEW should probably raise an exception
    ASSERT_TRUE(false);
  }
  
  //! this(x) /= other(x);
  virtual sparse_table& divide_equals(const table_base_t& base) {
    // ensure we are dealing with a sparse_table
    {
      sparse_table const* const other = 
          dynamic_cast<sparse_table const* const>(&base);
      if( NULL !=  other) {
        *this /= *other;
        return *this;
      }
    } {
      dense_table_t const* const other = 
          dynamic_cast<dense_table_t const* const>(&base);
      if( NULL != other ) { 
        *this /= *other;
        return *this;
      }
   }
    std::cout << "ERROR: std::bad_cast" << std::endl;
    // REVIEW should probably raise an exception
    ASSERT_TRUE(false);
  }
  
  //! (out(x) = this(x)) * other(x);
  virtual void times(const table_base_t& base, 
      table_base_t& out_base) const {
  
    // ensure we are dealing with a sparse_table
    sparse_table *const out = 
        dynamic_cast<sparse_table *const>(&out_base);
    if(out == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    *out = *this; // deep copy
    out->times_equals(base);
  }
  
  //! (out(x) = this(x)) / other(x);
  virtual void divide(const table_base_t& base, 
      table_base_t& out_base) const {
  
    // ensure we are dealing with a sparse_table
    sparse_table *const out = 
        dynamic_cast<sparse_table *const>(&out_base);
    if(out == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    *out = *this; // deep copy
    out->divide_equals(base);
  }
  
  virtual void marginalize(table_base_t& base) const {
    // ensure we are dealing with a dense_table
    dense_table_t* msg = dynamic_cast<dense_table_t*>(&base);
    if(msg == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    marginalize(*msg);
  }

  virtual void MAP(table_base_t& base) const {
    // ensure we are dealing with a dense_table
    dense_table_t* msg = dynamic_cast<dense_table_t*>(&base);
    if(msg == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    MAP(*msg);
  }

  virtual std::ostream& print(std::ostream& out = std::cout) const {
    // ensure we are dealing with a sparse_table
    const_ptr tbl = dynamic_cast<const_ptr>(this);
    if(tbl == NULL) {
      std::cout << "ERROR: std::bad_cast" << std::endl;
      // REVIEW should probably raise an exception
      ASSERT_TRUE(false);
    }
  
    out << *tbl;
    return out;
  }

  friend std::ostream& operator<<(std::ostream& out,
      const sparse_table<MAX_DIM>& tbl) 
  {
    out << "Sparse Table: " << tbl.args() << "{" << std::endl;
    typename sparse_data_t::const_iterator val = tbl._dataAtAsg.begin();
    typename sparse_data_t::const_iterator end = tbl._dataAtAsg.end();
    for( ; val != end; ++val) {
      out << "\tLogP({" << val->first << "}=" << tbl.linear_index(val->first) << ")=" << val->second << std::endl;
    }
    out << "}";
    return out;
  }
  
  virtual void load(graphlab::iarchive& arc) {
    arc >> _args;
    arc >> _dataAtAsg; // uses graphlab serialization operator
  }
  virtual void save(graphlab::oarchive& arc) const {
    arc << _args;
    arc << _dataAtAsg; // uses graphlab serialization operator
  }

private:
  //! Tests whether one sparse_index's i'th index is < another's 
  struct less_than_by_index {
    less_than_by_index(size_t ind) : _sorting_ind(ind) { } 
  
    inline bool operator()(
        const std::pair<sparse_index_t, double>& a, 
        const std::pair<sparse_index_t, double>& b) const
    {
      return less_than(a.first, b.first);
    }
    inline bool operator()(
        const std::pair<const sparse_index_t*, double*>& a, 
        const std::pair<const sparse_index_t*, double*>& b) const
    {
      return less_than(*(a.first), *(b.first));
    }
    inline bool operator()(
        const std::pair<const sparse_index_t*, const double*>& a, 
        const std::pair<const sparse_index_t*, const double*>& b) const
    {
      return less_than(*(a.first), *(b.first));
    }
  private: 
    inline bool less_than(const sparse_index_t& a, 
        const sparse_index_t& b) const 
    {
      return a.asg_at(_sorting_ind) < b.asg_at(_sorting_ind);
    }
    size_t _sorting_ind;
  };
  template<class T>
  void permute(T& data) const {
    std::vector<size_t> sorting_inds;
    for(size_t i=0; i<args().num_vars(); ++i)
      sorting_inds.push_back(i);
    permute(sorting_inds, data);
  }
  template<class T>
  void permute( const size_t sorting_ind, T& data ) const {
    std::vector<size_t> sorting_inds;
    sorting_inds.push_back(sorting_ind);
    permute(sorting_inds, data);
  }
  // REVIEW i might be able to copy _dataAtAsg and sort it with a new predicate
  template<class T>
  void permute( const std::vector<size_t>& sorting_inds, T& data ) const {
    std::vector<size_t>::const_reverse_iterator s    = sorting_inds.rbegin();
    std::vector<size_t>::const_reverse_iterator rend = sorting_inds.rend();
    for( ; s != rend; ++s) {
      DCHECK_LT(*s, args().num_vars());
  
      std::stable_sort(data.begin(), data.end(), less_than_by_index(*s));
    }
  }

  //! Restrict the sparse_index to a sparse_index over the subdomain
  sparse_index_t restrict(const sparse_index_t& asg, 
      const domain_t& sub_domain) const 
  {
    sparse_index_t other_asg(sub_domain.num_vars());
    size_t index = 0;
    // Map the variables 
    for(size_t i = 0; i < args().num_vars() && 
        index < sub_domain.num_vars(); ++i) {
      if(sub_domain.var(index) == args().var(i)) {
        other_asg.set_asg_at(index, asg.asg_at(i));
        index++;
      }
    }
    DCHECK_EQ(index, sub_domain.num_vars());
    return other_asg;
  } // end of restrict
  
  // O(n)
  // from http://stackoverflow.com/questions/3772664/intersection-of-two-stl-maps 
  // and http://stackoverflow.com/questions/1773526/in-place-c-set-intersection
  // REVIEW is there any problem with invalidated iterators? 
  template<typename KeyType, typename LeftValue, typename RightValue>
  void map_left_intersection(
      std::map<KeyType, LeftValue>& left, 
      const std::map<KeyType, RightValue>& right) const
  {
    typename std::map<KeyType, LeftValue>::iterator    il = left.begin();
    typename std::map<KeyType, LeftValue>::iterator l_end = left.end();
    typename std::map<KeyType, RightValue>::const_iterator ir    = right.begin();
    typename std::map<KeyType, RightValue>::const_iterator r_end = right.end();
    while (il != l_end && ir != r_end) {
      if (il->first < ir->first) {
        left.erase(il);
        ++il;
      }
      else if (ir->first < il->first) {
        ++ir;
      }
      else {
        ++il;
        ++ir;
      }
    }
    left.erase(il, l_end);
  }

  compact_view_t as_vector_view() {
    compact_view_t compact_view;
    compact_view.resize(_dataAtAsg.size());

    typename sparse_data_t::iterator it = _dataAtAsg.begin();
    for(size_t i = 0; i < _dataAtAsg.size(); ++i, ++it) {
      compact_view.at(i) = std::make_pair(&(it->first), &(it->second));
    }
    //std::copy(_dataAtAsg.begin(), _dataAtAsg.end(), compact_view.begin());
    return compact_view;
  }
  compact_const_view_t as_vector_view() const {
    compact_const_view_t compact_view;
    compact_view.resize(_dataAtAsg.size());

    typename sparse_data_t::const_iterator it = _dataAtAsg.begin();
    for(size_t i = 0; i < _dataAtAsg.size(); ++i, ++it) {
      compact_view.at(i) = std::make_pair(&(it->first), &(it->second));
    }
    //std::copy(_dataAtAsg.begin(), _dataAtAsg.end(), compact_view.begin());
    return compact_view;
  }

private:
  inline const domain_t& args() const { return _args; }

  inline assignment_t as_assignment(const sparse_index_t &asg) const {
    std::vector<size_t> asgs(asg.begin(), asg.end());
    return assignment_t(args(), asgs);
  }

  inline sparse_index_t as_sparse_index(const assignment_t &asg) const {
    std::vector<size_t> asgs(asg.begin(), asg.end());
    return sparse_index_t(asgs);
  }

private:
  //! The indicies in an assignment are mapped (one-to-one and in-order) to the 
  //  variables in a domain.
  domain_t _args;

  //! Map between the sparse assignment in the domain and its value. Sorted by
  //  assignment.
  sparse_data_t  _dataAtAsg;
};


} // end of namespace graphlab

#endif // SPARSE_TABLE_HPP


================================================
FILE: toolkits/graphical_models/factors/table_base.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef TABLE_BASE_HPP
#define TABLE_BASE_HPP

/**
 * This file defines the root of the table hierarchy for
 * the various table types.
 *
 *  \author Scott Richardson     10/2012
 */

// INCLUDES ===================================================================>

// Including Standard Libraries
#include <cassert>
#include <iostream>

#include <graphlab/serialization/serialization_includes.hpp>

#include "discrete_variable.hpp"


namespace graphlab {


template<size_t MAX_DIM>
class table_base {
public:
  typedef table_base const *const const_ptr;

  virtual ~table_base() { }

  static inline double APPROX_LOG_ZERO() { 
    //return -std::numeric_limits<double>::max();
    return -1e6; 
  }

  virtual table_base& deep_copy(const table_base& base) = 0;
  virtual table_base& copy_onto(const table_base& base) = 0;

  //virtual table_base&   plus_equals(const table_base& base) = 0;
  virtual table_base&  times_equals(const table_base& base) = 0;
  virtual table_base& divide_equals(const table_base& base) = 0;
  //virtual void    plus(const table_base& base, table_base& out) const = 0;
  virtual void   times(const table_base& base, table_base& out) const = 0;
  virtual void  divide(const table_base& base, table_base& out) const = 0;

  virtual const discrete_variable& var(size_t index) const = 0;
  
  // REVIEW these are always dense_tables. should they be in here?
  virtual void     MAP(table_base& msg) const = 0;
  virtual void     marginalize(table_base& msg) const = 0;

  virtual void    zero() = 0;
  virtual size_t numel() const = 0; // REVIEW might not be necessary
  virtual size_t ndims() const = 0;
  virtual void    load(graphlab::iarchive& arc) = 0;
  virtual void    save(graphlab::oarchive& arc) const = 0;

  friend std::ostream& operator<<(std::ostream& out,
                            const table_base<MAX_DIM>& factor) {
    factor.print(out);
    return out;
  }

private:
  virtual std::ostream& print(std::ostream& out = std::cout) const = 0;
}; // end of table_base

} // end of namespace graphlab 

#endif // TABLE_BASE_HPP


================================================
FILE: toolkits/graphical_models/factors/table_factor.hpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#ifndef TABLE_FACTOR_HPP
#define TABLE_FACTOR_HPP

/**
 * This file defines an opaque interface (or facade) for the various table types.
 *
 *  \author Scott Richardson     10/2012
 */

// INCLUDES ===================================================================>

// Including Standard Libraries
#include <cassert>
#include <cmath>

#include <iostream>
#include <typeinfo>
#include <algorithm>
#include <limits>
#include <vector>
#include <set>

#include <graphlab/serialization/serialization_includes.hpp>
#include "dense_table.hpp"
#include "sparse_table.hpp"


namespace graphlab {


/**
 * An abstraction implemented to manage the creation and deletion 
 * of the sparse/dense tables; so that RAII
 */
 // NOTE im not 100% this class is necessary, except to provide 
 // convinient access to certian operators like operator*(). if i
 // made table_base concrete, i might be able to use it wherever
 // i currently use this class. (it's not like this class is 
 // providing conversions between dense and sparse tables or anything.)
template<size_t MAX_DIM>
class table_factor {
  typedef dense_table<MAX_DIM>    dense_table_t;
  typedef sparse_table<MAX_DIM>   sparse_table_t;
  typedef table_base<MAX_DIM>     table_base_t;

public:
  //static const size_t MAX_DIM_ = MAX_DIM; 
 

  // REVIEW accessing this requires constructing a table_factor
  enum table_storage_t { nil, DENSE_TABLE, SPARSE_TABLE };

  table_factor() : _table_storage(table_factor::nil), _table(NULL) { }

  table_factor(table_storage_t storage) : 
      _table_storage(storage), _table(NULL) 
  { 
    alloc();
  }

  table_factor(const table_factor& other) :
      _table_storage(other._table_storage), _table(NULL) 
  {
    if(other._table == NULL) { // integrity check
      DCHECK_EQ(other._table_storage, table_factor::nil);
    } else {
      alloc();

      _table->deep_copy(*(other._table));
    }
  }

  table_factor(table_storage_t storage, typename table_base_t::const_ptr base) : 
      _table_storage(storage), _table(NULL) 
  {
    DCHECK_NE(base, NULL);
    DCHECK_NE(_table_storage, table_factor::nil);

    alloc();
    _table->deep_copy(*base);
  }

  table_factor(table_storage_t storage, const table_base_t& base) : 
      _table_storage(storage), _table(NULL) 
  {
    DCHECK_NE(_table_storage, table_factor::nil);

    alloc();
    _table->deep_copy(base);
  }

  table_factor(typename table_base_t::const_ptr base) :
      _table_storage(table_factor::nil), _table(NULL) 
  {  
    DCHECK_NE(base, NULL);

    determine_storage_t(*base);
    alloc();

    _table->deep_copy(*base);
  }

  table_factor(const table_base_t& base) : 
      _table_storage(table_factor::nil), _table(NULL) {

    determine_storage_t(base);
    alloc();

    _table->deep_copy(base);
  }

  ~table_factor() {
    delete _table;
  }

  // TODO provide iterators

public:
  table_factor& operator=(const table_factor& other) {
    if(this == &other) return *this;

    table_base_t* base = NULL;
    if(other._table == NULL) { // integrity check
      DCHECK_EQ(other._table_storage, table_factor::nil);
    } else {
      try {
        alloc_table(other._table_storage, &base);
        base->deep_copy(*(other._table));
      } catch ( ... ) {
        delete base;
        throw;
      }
    }
    _table_storage = other._table_storage;
    delete _table;
    _table = base;

    return *this;
  }
/*
  table_factor& operator+=(const table_factor& other) {
    DCHECK_NE(_table, NULL);
    DCHECK_NE(other._table, NULL);

    _table->plus_equals(*(other._table));
    return *this;
  }
*/
  table_factor& operator*=(const table_factor& other) {
    DCHECK_NE(_table, NULL);
    DCHECK_NE(other._table, NULL);

    _table->times_equals(*(other._table));
    return *this;
  }

  table_factor& operator/=(const table_factor& other) {
    DCHECK_NE(_table, NULL);
    DCHECK_NE(other._table, NULL);

    _table->divide_equals(*(other._table));
    return *this;
  }

  table_factor operator*(const table_factor& other) const {
    DCHECK_NE(_table, NULL);
    DCHECK_NE(other._table, NULL);
    
    // deep copy
    table_factor out(_table_storage);
    _table->times(*(other._table), *(out._table));
    
    return out;
  }

  table_factor operator/(const table_factor& other) const {
    DCHECK_NE(_table, NULL);
    DCHECK_NE(other._table, NULL);
    
    // deep copy
    table_factor out(_table_storage);
    _table->divide(*(other._table), *(out._table));
    
    return out;
  }

  table_base_t const * table() const { 
    DCHECK_NE(_table, NULL);

    return _table; 
  }

  table_base_t* table() { 
    DCHECK_NE(_table, NULL);

    return _table; 
  }

  void load(graphlab::iarchive& arc) {
    arc >> _table_storage; 
    alloc();
    if(_table_storage != table_factor::nil) arc >> *_table;
  }

  void save(graphlab::oarchive& arc) const {
    arc << _table_storage;
    if(_table_storage != table_factor::nil) arc << *_table;
  }

private:
  // NOTE take ownership of base
  table_factor(table_storage_t storage, table_base_t *const *const base) : 
      _table_storage(storage) {
    if(base != NULL) _table = *base;
    DCHECK_NE(_table, NULL);
  }

  void determine_storage_t(const table_base_t& base) {
    if( typeid(base) == typeid(dense_table_t) ) 
      _table_storage = table_factor::DENSE_TABLE;
    else if( typeid(base) == typeid(sparse_table_t) ) 
      _table_storage = table_factor::SPARSE_TABLE;
    else {
      _table_storage = table_factor::nil;
      // REVIEW should probably raise an exception
      std::cout << "ERROR: unknown table storage type. " << std::endl;
      ASSERT_TRUE(false);
    }
  }

  void alloc() {
    alloc_table(_table_storage, &(this->_table));
  }

  // REVIEW could probably use virtual constructors to avoid some of this
  void alloc_table(table_storage_t storage, table_base_t** base) {
    // try to avoid memory leaks
    DCHECK_EQ(*base, NULL);

    switch(storage) {
      case table_factor::DENSE_TABLE:
        *base = new dense_table_t();
        break;
      case table_factor::SPARSE_TABLE:
        *base = new sparse_table_t();
        break;
      case table_factor::nil:
      default:
        // i allow *this to be constructed with a table_factor::nil table storage 
        // type, so this path is possible 
        break;
    }
  }

public:
  table_storage_t table_storage() const { return _table_storage; } 

  friend std::ostream& operator<<(std::ostream& out,
                            const table_factor<MAX_DIM>& factor) {
    out << "Table Factor(" << factor._table_storage << "): " << *(factor._table);
    return out;
  }

private: 
  table_storage_t _table_storage; 
  table_base_t* _table;

}; // end of table_factor


} // end of namespace graphlab 

#endif // TABLE_FACTOR_HPP


================================================
FILE: toolkits/graphical_models/factors/tests/CMakeLists.txt
================================================
project(GraphLab)

# link_libraries(${Boost_LIBRARIES})
# link_libraries(${GraphLab_LIBRARIES})


# set include path. doesn't seem like the best way to set this...
include_directories(
  ${GraphLab_SOURCE_DIR}/../..)


macro(add_all_subdirectories retval curdir)
  file(GLOB sub-dir RELATIVE ${curdir} *)
  set(list_of_dirs "")
  foreach(dir ${sub-dir})
    if(IS_DIRECTORY ${curdir}/${dir})
    STRING(SUBSTRING ${dir} 0 1 firstchar)
        if(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_" )
        else(${firstchar} STREQUAL "." OR ${firstchar} STREQUAL "_")
          set(list_of_dirs ${list_of_dirs} ${dir})
          message(STATUS "Detected App: " ${dir})
          add_subdirectory(${dir})
        endif()
    endif()
  endforeach()
  set(${retval} ${list_of_dirs})
endmacro()

add_all_subdirectories(retval, ${CMAKE_CURRENT_SOURCE_DIR})


================================================
FILE: toolkits/graphical_models/factors/tests/denoise/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(denoise denoise.cpp)
requires_opencv(denoise)

file(COPY noisy_img.png 
     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)
file(COPY denoised_gm.png
     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/)


================================================
FILE: toolkits/graphical_models/factors/tests/denoise/denoise.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab used for discrete loopy
 * belief propagation in a factor graph to denoise a synthetic noisy image.
 *
 * ./denoise --damping=.3 --ncpus=4 
 *
 *  \author Scott Richardson 
 *          based on toolkits/graphical_models/deprecated/loopybp_denoise.cpp
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>

#include <opencv2/opencv.hpp>

#include <graphlab.hpp>

// #include "image.hpp"
#include <factors/factor_graph.hpp>
#include <factors/bp_vertex_program.hpp>

// Include the macro for each operation
#include <graphlab/macros_def.hpp>

// Global variables
//static size_t NCOLORS;
//static double SIGMA;


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>       dense_table_t;
typedef graphlab::discrete_domain<MAX_DIM>   domain_t;
typedef graphlab::discrete_variable          variable_t;


struct clopts_vals { 
  clopts_vals(double bound = 1E-4, double damping = 0.3, std::string exec_t="sync") : 
      BOUND(bound), DAMPING(damping), exec_type(exec_t) { }

  double BOUND;
  double DAMPING;
  std::string exec_type;
};


int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals, 
    int argc, char** argv);
template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts);

// MAIN
// ============================================================================>
int main(int argc, char** argv) {
  std::cout << "This program solves the sum task."
            << std::endl;

  // Parse command line arguments --------------------------------------------->
  std::string beliefs_filename = "beliefs.txt";
  
  graphlab::mpi_tools::init(argc, argv);
  ///! Create a distributed control object (must come after mpi_tools::init())
  graphlab::distributed_control dc; 

  graphlab::command_line_options clopts("Run Loopy BP on a Network");
  clopts_vals clvals;
  if( setup_cli(clopts, clvals, argc, argv) != EXIT_SUCCESS ) return EXIT_FAILURE;
  
  ///! Create a distributed graph object 
  belief_prop::graph_type<MAX_DIM>::type graph(dc, clopts);


  // Create the factor graph ------------------------------------------>
  std::cout << "Loading Factor Graph" << std::endl;
  belief_prop::factor_graph<MAX_DIM> fgraph;

  cv::Mat_< uchar > image = cv::imread("noisy_img.png", 0); // force to grayscale with '0'
  assert( image.data != 0x0 );

  unsigned ni = image.cols;
  unsigned nj = image.rows;
  std::cout << "ni = " << ni << " nj = " << nj << std::endl;


  // get the colorspace
  unsigned n_levels = 5;
  std::cout << "pixel values = ";
  std::vector<unsigned char> values(n_levels);
  for (unsigned i=0; i<n_levels; ++i) {
    values[i] = (256/(n_levels - 1))*i - (i>0?1:0);
    std::cout << (int)values[i] << " ";
  }
  std::cout << std::endl;


  // create variables and prior factor nodes
  std::vector< std::vector< variable_t > > var_ids;
  var_ids.resize( nj, std::vector<variable_t>( ni, variable_t() ) );
  for (unsigned i=0; i<ni; ++i) {
    for (unsigned j=0; j<nj; ++j) {
      std::stringstream ss;
      ss << j << "x" << i;
      var_ids[j][i] = fgraph.add_variable(n_levels, ss.str());
    }
  }


  // add prior factor nodes
  double same_prob = 0.5f;
  double diff_prob = (1.0f - same_prob)/(n_levels - 1);
  for (unsigned i=0; i<ni; ++i) {
    for (unsigned j=0; j<nj; ++j) {
      variable_t var_id = var_ids[ j ][ i ];
      // NOTE the prior is always dense
      dense_table_t& prior = fgraph.prior_for_variable(var_id);

      // determine index of original image pixel-value
      float min_diff = 255.0f;
      unsigned idx_og = n_levels;
      for (unsigned l=0; l<n_levels; ++l) {
        float abs_diff = std::abs(((float)image(j,i)) - values[l]);
        if (abs_diff < min_diff) {
          min_diff = abs_diff;
          idx_og = l;
        }
      }

      domain_t::const_iterator end = prior.domain().end();
      for(domain_t::const_iterator asg = prior.domain().begin(); 
          asg != end; ++asg) {
        assert(asg->linear_index() < n_levels);
        double p;
        if(asg->linear_index() == idx_og)
          p = same_prob;
        else
          p = diff_prob;
        // Values are stored in log form
        prior.set_logP( *asg, log(p) );
      }
    }
  }


  // create factors and connect the variables
  float neighbor_same_prob = 0.5f;
  float neighbor_diff_prob = (1.0f - neighbor_same_prob)/(n_levels - 1);
  const float cost_scale = 10.0f;
  for (unsigned i=0; i<ni; ++i) {
    for (unsigned j=0; j<nj; ++j) {
      if (j != 0) {
        // Create the 2-way factor
        std::vector<variable_t> args;
        // connect vertical neighbors
        args.push_back(var_ids[ j-1 ][ i ]);
        args.push_back(var_ids[ j ][ i ]);

        // Construct the arguments (which will remap the domain)
        // std::cout << "domain: " << domain << std::endl;
        // Build the factor
        domain_t domain(args);
        dense_table_t factor(domain);

        // Set the weights
        domain_t::const_iterator end = factor.domain().end();
        for(domain_t::const_iterator asg = factor.domain().begin(); 
            asg != end; ++asg) {
          assert(asg->linear_index() < n_levels*n_levels);
          double err;
          if(asg->asg(var_ids[j-1][i]) == asg->asg(var_ids[j][i]))
            err = neighbor_same_prob;
          else
            err = neighbor_diff_prob;

          // Values are stored in log form
          factor.set_logP( *asg, log(err*cost_scale) );
        }
        // Save the factor to the factor graph
        fgraph.add_factor(factor);
      }
      if (i != 0) {
        // Create the 2-way factor
        std::vector<variable_t> args;
        // connect horizontal neighbors
        args.push_back(var_ids[ j ][ i-1 ]);
        args.push_back(var_ids[ j ][ i ]);

        // Construct the arguments (which will remap the domain)
        // std::cout << "domain: " << domain << std::endl;
        // Build the factor
        domain_t domain(args);
        dense_table_t factor(domain);

        // Set the weights
        domain_t::const_iterator end = factor.domain().end();
        for(domain_t::const_iterator asg = factor.domain().begin(); 
            asg != end; ++asg) {
          assert(asg->linear_index() < n_levels*n_levels);
          double err;
          if(asg->asg(var_ids[j][i-1]) == asg->asg(var_ids[j][i]))
            err = neighbor_same_prob;
          else
            err = neighbor_diff_prob;

          // Values are stored in log form
          factor.set_logP( *asg, log(err*cost_scale) );
        }
        // Save the factor to the factor graph
        fgraph.add_factor(factor);
      }
    }
  }
  assert(fgraph.num_factors() == nj*ni + (nj-1)*ni + nj*(ni-1));

  const size_t num_variables = fgraph.num_variables();
  const size_t num_factors = fgraph.num_factors();
  std::cout << "num_variables: " << num_variables << " "
            << "num_factors: " << num_factors << std::endl;
  std::cout << "Finished!" << std::endl;


  // Build the BP graph from the factor graph---------------------------------->
  std::cout << "Building BP graph from the factor graph" << std::endl;
  fgraph.make_bp_graph( graph, clvals.BOUND, clvals.DAMPING ); 
  run_engine<MAX_DIM>(dc, graph, clvals.exec_type, clopts);
  fgraph.pull_beliefs_for_variables( graph );


  // Saving the output -------------------------------------------------------->
  // NOTE: this can be done better. see loopybp_denoise.cpp
  std::cout << "Saving the predicted image" << std::endl;
  std::cout << "Collect the noisy image. " << std::endl;
//  merge_reduce_type pred_image = 
//    graph.map_reduce_vertices<merge_reduce_type>(pred_map_function);
  std::cout << "saving the pred image." << std::endl;
  if(dc.procid() == 0) {
    // Fill in output image------------------------------------------------------>
    cv::Mat_< uchar > output( nj, ni );
    for (unsigned i=0; i<ni; ++i) {
      for (unsigned j=0; j<nj; ++j) {
        size_t ind = fgraph.belief_for_variable(var_ids[j][i]).max_index();
        output(j,i) = values[ind]; 
      }
    }
    cv::imwrite("denoised.png",output);

    cv::Mat_< uchar > gm = cv::imread("denoised_gm.png", 0); // force to grayscale with '0'
    cv::Scalar err = cv::sum(cv::abs(gm - output));
    ASSERT_LT(err(0), ni*nj*1e-3);
  }

  std::cout << "All tests passed" << std::endl;
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;
} // end of main


// UTILS
// ============================================================================>
int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals,
    int argc, char** argv) {

  clopts.attach_option("bound", clvals.BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping", clvals.DAMPING,
                       "The amount of message damping (higher = more damping)");
//  clopts.attach_option("beliefs", &beliefs_filename,
//                       "The file to save the belief predictions"); 
  clopts.attach_option("engine", clvals.exec_type,
                       "The type of engine to use {async, sync}.");
  clopts.set_scheduler_type("fifo");


  bool success = clopts.parse(argc, argv);
  if(!success) {    
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}

template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts) 
{
  size_t num_vertices = graph.num_vertices();
  size_t num_edges = graph.num_edges();
  std::cout << "Loaded: " << num_vertices << " vertices "
            << "and " << num_edges << " edges." << std::endl;
  std::cout << "Finished!" << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  typedef graphlab::omni_engine<belief_prop::bp_vertex_program<MAX_DIM> > engine_type;
  engine_type engine(dc, graph, exec_type, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds with " << clopts.get_ncpus() << " cpus." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_MAD_relation/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(test_MAD_relation test_MAD_relation.cpp)


================================================
FILE: toolkits/graphical_models/factors/tests/test_MAD_relation/test_MAD_relation.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab belief propagation on
 * a factor node designed to constrain a MAD (multiply and add)
 * relation (a + b/c) = d
 *
 *  \author Scott Richardson 
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>

#include <graphlab.hpp>

#include <factors/factor_graph.hpp>
#include <factors/bp_vertex_program.hpp>


// Include the macro for each operation
#include <graphlab/macros_def.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>     dense_table_t;
typedef graphlab::discrete_domain<MAX_DIM> domain_t;
typedef graphlab::discrete_variable        variable_t;


struct clopts_vals { 
  clopts_vals(double bound = 1E-4, double damping = 0.0, std::string exec_t="sync") : 
      BOUND(bound), DAMPING(damping), exec_type(exec_t) { }

  double BOUND;
  double DAMPING;
  std::string exec_type;
};

int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals, 
    int argc, char** argv);
template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts);
std::vector<double> compute_labels(size_t n_labels, 
    double max_range, double min_range);
void compute_normal_dist(double mean, double std_dev, 
    const std::vector<double>& labels, dense_table_t& prior);

// MAIN
// ============================================================================>
int main(int argc, char** argv) {
  std::cout << "This program solves the sum task."
            << std::endl;

  global_logger().set_log_level(LOG_DEBUG);

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc; // must come after mpi_tools::init()

  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Run Loopy BP on a Network");
  clopts_vals clvals;
  if( setup_cli(clopts, clvals, argc, argv) != EXIT_SUCCESS ) return EXIT_FAILURE;
  ///! Create a distributed control object 
  belief_prop::graph_type<MAX_DIM>::type graph(dc, clopts);


  // Create the factor graph ------------------------------------------>
  std::cout << "Loading Factor Graph" << std::endl;
  belief_prop::factor_graph<MAX_DIM> fgraph;

  // Create the variables
  size_t n_labels = 20; 
  double cost_scale = 100;

  double min_range = 0.0; double max_range = 3.0; 
  std::vector<double> labels = compute_labels(n_labels, max_range, min_range);

  variable_t var_a = fgraph.add_variable(n_labels, "var_a");
  {
  double mean = 2.5; double std_dev = 0.1; 
  dense_table_t& prior = fgraph.prior_for_variable(var_a);
  compute_normal_dist(mean, std_dev, labels, prior);
  std::cout << "var_a_prior=" << prior << std::endl;
  }

  variable_t var_b = fgraph.add_variable(n_labels, "var_b");
  {
  double mean = 1.1; double std_dev = 0.1; 
  dense_table_t& prior = fgraph.prior_for_variable(var_b);
  compute_normal_dist(mean, std_dev, labels, prior);
  std::cout << "var_b_prior=" << prior << std::endl;
  }

  // scale
  float c_scale = -0.5;

  std::vector<double> d_labels = compute_labels(2*n_labels, max_range, min_range);
  // REVIEW this should probably have n_labels*2
  variable_t var_d = fgraph.add_variable(2*n_labels, "var_d");
  {
  dense_table_t& prior = fgraph.prior_for_variable(var_d);
  prior.zero();
  std::cout << "var_d_prior=" << prior << std::endl;
  }


  // Create a factor
  std::vector<variable_t> args;
  // connect vertical neighbors
  args.push_back(var_a);
  args.push_back(var_b);
  args.push_back(var_d);
  // Build the factor
  dense_table_t mad(args);
  // Set the weights
  domain_t::const_iterator end = mad.domain().end();
  for(domain_t::const_iterator asg = mad.domain().begin(); asg != end; ++asg) { 
    double a = labels[asg->asg(var_a)];
    double b = labels[asg->asg(var_b)];
    double c = c_scale;
    double d = d_labels[asg->asg(var_d)];
    double d_prime = (a + c*b);
    double err = d_prime - d;
    //std::cout << "a = " << a << " b = " << b << " d = " << d << " err = " << err << std::endl;
    mad.set_logP( *asg, -1*(cost_scale*err*err) );
  }
  // Save the factor to the factor graph
  fgraph.add_factor(mad, "mad");


  const size_t num_variables = fgraph.num_variables();
  const size_t num_factors = fgraph.num_factors();
  std::cout << "num_variables: " << num_variables << " num_factors: " << num_factors << std::endl;
  std::cout << "Finished!" << std::endl;


  // Build the BP graph from the factor graph---------------------------------->
  std::cout << "Building BP graph from the factor graph" << std::endl;
  fgraph.make_bp_graph( graph, clvals.BOUND, clvals.DAMPING ); 
  run_engine<MAX_DIM>(dc, graph, clvals.exec_type, clopts);
  fgraph.pull_beliefs_for_variables( graph );


  // Saving the output -------------------------------------------------------->
  //fgraph.print_variable(var_a, labels);
  //fgraph.print_variable(var_b, labels);
  //fgraph.print_variable(var_d, d_labels);

  double a = labels[fgraph.belief_for_variable(var_a).max_index()];
  double b = labels[fgraph.belief_for_variable(var_b).max_index()];
  double d = d_labels[fgraph.belief_for_variable(var_d).max_index()];
  std::cout << "var_a: " << a << std::endl;
  std::cout << "var_b: " << b << std::endl;
  std::cout << "var_d: " << d << std::endl;

  double d_prime = a + (c_scale * b);
  double err = std::abs(d_prime - d);
  std::cout << "d: " << d << " d_prime: " << d_prime << " err: " << err << std::endl;
  ASSERT_LT(err, (max_range - min_range)/n_labels);
  std::cout << "All tests passed" << std::endl;
} // end of main


// UTILS
// ============================================================================>
int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals,
    int argc, char** argv) {

  clopts.attach_option("bound", clvals.BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping", clvals.DAMPING,
                       "The amount of message damping (higher = more damping)");
//  clopts.attach_option("beliefs", &beliefs_filename,
//                       "The file to save the belief predictions"); 
  clopts.attach_option("engine", clvals.exec_type,
                       "The type of engine to use {async, sync}.");
  clopts.set_scheduler_type("fifo");


  bool success = clopts.parse(argc, argv);
  if(!success) {    
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}

template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts) 
{
  size_t num_vertices = graph.num_vertices();
  size_t num_edges = graph.num_edges();
  std::cout << "Loaded: " << num_vertices << " vertices "
            << "and " << num_edges << " edges." << std::endl;
  std::cout << "Finished!" << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  typedef graphlab::omni_engine<belief_prop::bp_vertex_program<MAX_DIM> > engine_type;
  engine_type engine(dc, graph, exec_type, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;
}

std::vector<double> compute_labels(size_t n_labels, 
    double max_range, double min_range) 
{
  std::vector<double> labels(n_labels, 0.0);

  double step = (max_range - min_range)/(n_labels-1);
  for(unsigned i = 0; i < n_labels; ++i) {
    labels[i] = min_range + i*step;
  }
  return labels;
}

void compute_normal_dist(double mean, double std_dev, 
    const std::vector<double>& labels, dense_table_t& prior) 
{
  domain_t::const_iterator asg = prior.domain().begin();
  domain_t::const_iterator end = prior.domain().end();
  std::vector<double>::const_iterator label = labels.begin();
  for( ; asg != end; ++asg, ++label) {
    double nv = (*label-mean)/std_dev;
    prior.set_logP( *asg, -1*nv*nv );
  }
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_bool_var/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(test_bool_var test_bool_var.cpp)
add_graphlab_executable(test_cat_bool_joint test_cat_bool_joint.cpp)


================================================
FILE: toolkits/graphical_models/factors/tests/test_bool_var/test_bool_var.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab belief propagation on
 * a factor graph designed to test a unary factor node.
 *
 *  \author Scott Richardson 
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>

#include <graphlab.hpp>

#include <factors/factor_graph.hpp>
#include <factors/bp_vertex_program.hpp>


// Include the macro for each operation
#include <graphlab/macros_def.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>   dense_table_t;
typedef graphlab::discrete_variable      variable_t;


struct clopts_vals { 
  clopts_vals(double bound = 1E-4, double damping = 0.0, std::string exec_t="sync") : 
      BOUND(bound), DAMPING(damping), exec_type(exec_t) { }

  double BOUND;
  double DAMPING;
  std::string exec_type;
};

int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals, 
    int argc, char** argv);
template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, const graphlab::command_line_options& clopts);

// MAIN
// ============================================================================>
int main(int argc, char** argv) {
  std::cout << "This program solves the sum task."
            << std::endl;

  graphlab::mpi_tools::init(argc, argv);
  ///! Create a distributed control object (must come after mpi_tools::init())
  graphlab::distributed_control dc; 

  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Run Loopy BP on a Network");
  clopts_vals clvals;
  if( setup_cli(clopts, clvals, argc, argv) != EXIT_SUCCESS ) return EXIT_FAILURE;

  ///! Create a distributed graph object 
  belief_prop::graph_type<MAX_DIM>::type graph(dc, clopts);


  // Create the factor graph ------------------------------------------>
  std::cout << "Loading Factor Graph" << std::endl;
  belief_prop::factor_graph<MAX_DIM> fgraph;

  // Create the variable
  size_t nlabels = 2;
  variable_t bool_var_b = fgraph.add_variable(nlabels, "bool_var_b");
  dense_table_t& prior = fgraph.prior_for_variable(bool_var_b);
  prior.zero();


  // Set the weights
  std::vector<double> logf(2); logf[0] = std::log(0.6); logf[1] = std::log(0.4);
  // Build a unary factor
  dense_table_t bool_obs( bool_var_b, logf );
  // Save the factor to the factor graph
  fgraph.add_factor(bool_obs, "bool_obs");


  const size_t num_variables = fgraph.num_variables();
  const size_t num_factors = fgraph.num_factors();
  std::cout << "num_variables: " << num_variables << " num_factors: " << num_factors << std::endl;
  std::cout << "Finished!" << std::endl;


  // Build the BP graph from the factor graph---------------------------------->
  std::cout << "Building BP graph from the factor graph" << std::endl;
  fgraph.make_bp_graph( graph, clvals.BOUND, clvals.DAMPING ); 
  run_engine<MAX_DIM>(dc, graph, clvals.exec_type, clopts);
  fgraph.pull_beliefs_for_variables( graph );


  // Saving the output -------------------------------------------------------->
  std::cout << fgraph.belief_for_variable(bool_var_b) << std::endl;
  double bobs = fgraph.belief_for_variable(bool_var_b).logP(1);
  double err = abs(bobs - .405465);
  ASSERT_LT(err, .01);
  std::cout << "All tests passed" << std::endl;
} // end of main


// UTILS
// ============================================================================>
int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals,
    int argc, char** argv) {

  clopts.attach_option("bound", clvals.BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping", clvals.DAMPING,
                       "The amount of message damping (higher = more damping)");
//  clopts.attach_option("beliefs", &beliefs_filename,
//                       "The file to save the belief predictions"); 
  clopts.attach_option("engine", clvals.exec_type,
                       "The type of engine to use {async, sync}.");
  clopts.set_scheduler_type("fifo");

  bool success = clopts.parse(argc, argv);
  if(!success) {    
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}

template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts) 
{
  size_t num_vertices = graph.num_vertices();
  size_t num_edges = graph.num_edges();
  std::cout << "Loaded: " << num_vertices << " vertices "
            << "and " << num_edges << " edges." << std::endl;
  std::cout << "Finished!" << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  typedef graphlab::omni_engine<belief_prop::bp_vertex_program<MAX_DIM> > engine_type;
  engine_type engine(dc, graph, exec_type, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_bool_var/test_cat_bool_joint.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab belief propagation on
 * a factor graph designed to ignore false positives.
 *
 *  \author Scott Richardson 
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>

// annoyingly, this must be done before graphlab/logger/logger.hpp is called
#define OUTPUTLEVEL LOG_EMPH
#include <graphlab.hpp>

#include <factors/factor_graph.hpp>
#include <factors/bp_vertex_program.hpp>


// Include the macro for each operation
#include <graphlab/macros_def.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>   dense_table_t;
typedef graphlab::discrete_variable      variable_t;


struct clopts_vals { 
  clopts_vals(double bound = 1E-4, double damping = 0.0, 
      std::string exec_type="sync", int verbose = LOG_EMPH) : 
      BOUND(bound), DAMPING(damping), EXEC_TYPE(exec_type), VERBOSE(verbose) { }

  double BOUND;
  double DAMPING;
  std::string EXEC_TYPE;
  int VERBOSE;
};

int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals, 
    int argc, char** argv);
template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts);

// MAIN
// ============================================================================>
int main(int argc, char** argv) {
  std::cout << "This program solves the sum task."
            << std::endl;

  graphlab::mpi_tools::init(argc, argv);
  ///! Create a distributed control object (must come after mpi_tools::init())
  graphlab::distributed_control dc; 

  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Run Loopy BP on a Network");
  clopts_vals clvals;
  if( setup_cli(clopts, clvals, argc, argv) != EXIT_SUCCESS ) return EXIT_FAILURE;
  
  ///! Create a distributed graph object 
  belief_prop::graph_type<MAX_DIM>::type graph(dc, clopts);


  // Create the factor graph ------------------------------------------>
  std::cout << "Loading Factor Graph" << std::endl;
  belief_prop::factor_graph<MAX_DIM> fgraph;

  // Create the variables
  size_t nlabels = 2;
  variable_t foo = fgraph.add_variable(nlabels, "foo");
  std::vector<double> logf(nlabels, 0.0); logf[0] = -1.0;
  fgraph.set_prior_for_variable(foo, logf);

  variable_t bool_var_b = fgraph.add_variable(nlabels, "bool_var_b");
  dense_table_t& bool_var_b_prior = fgraph.prior_for_variable(bool_var_b);
  bool_var_b_prior.zero();
  //std::vector<double> logb(nlabels, std::log(0.5));
  //fgraph.set_prior_for_variable(bool_var_b, logb);

  // add joint nlog belief values
  //  cat/fp-tp|  false | true |
  // ----------|--------|------|
  // foo       |  0.1   |  0.9 |
  // ----------|--------|------|
  // false pos |  0.8   |  0.2 |
  // ---------------------------
  //


  // Create a factor
  std::vector<variable_t> args;
  // connect vertical neighbors
  args.push_back(foo);
  args.push_back(bool_var_b);
  // Set the weights
  std::vector<double> logc(nlabels*nlabels);
  logc[0] = std::log(0.1); logc[2] = std::log(0.9);
  logc[1] = std::log(0.8); logc[3] = std::log(0.2);
  // Build the factor
  dense_table_t cbj(args, logc);
  // Save the factor to the factor graph
  fgraph.add_factor(cbj, "cbj");

  // Build the unary factor
  logf[0] = std::log(0.1); logf[1] = std::log(0.9);
  dense_table_t bool_obs(bool_var_b, logf);
  // Save the factor to the factor graph
  fgraph.add_factor(bool_obs, "bool_obs");


  const size_t num_variables = fgraph.num_variables();
  const size_t num_factors = fgraph.num_factors();
  std::cout << "num_variables: " << num_variables << " num_factors: " << num_factors << std::endl;
  std::cout << "Finished!" << std::endl;


  // Build the BP graph from the factor graph---------------------------------->
  std::cout << "Building BP graph from the factor graph" << std::endl;
  fgraph.make_bp_graph( graph, clvals.BOUND, clvals.DAMPING ); 
  run_engine<MAX_DIM>(dc, graph, clvals.EXEC_TYPE, clopts);
  fgraph.pull_beliefs_for_variables( graph );


  // Saving the output -------------------------------------------------------->
  double bobs_t = fgraph.belief_for_variable(bool_var_b).logP(1);
  double bobs_f = fgraph.belief_for_variable(bool_var_b).logP(0);
  double p_true = std::exp(bobs_t) / (std::exp(bobs_t) + std::exp(bobs_f));
  std::cout << "p_true = " << p_true << std::endl;
  double err = abs(p_true - .788);
  ASSERT_LT(err, .01);

//  std::cout << fgraph.belief(graph, bool_var_b.id()). << std::endl;
//  std::cout << fgraph.belief(graph, foo.id()). << std::endl;
  std::cout << "All tests passed" << std::endl;
} // end of main


// UTILS
// ============================================================================>
int setup_cli(graphlab::command_line_options& clopts, clopts_vals& opts,
    int argc, char** argv) {

  clopts.attach_option("bound", opts.BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping", opts.DAMPING,
                       "The amount of message damping (higher = more damping)");
  clopts.attach_option("verbose", opts.VERBOSE,
                       "Verbosity of Printing: 0 (lots), 2 (default), 6 (no printing).");
//  clopts.attach_option("beliefs", &beliefs_filename,
//                       "The file to save the belief predictions"); 
  clopts.attach_option("engine", opts.EXEC_TYPE,
                       "The type of engine to use {async, sync}.");
  clopts.set_scheduler_type("fifo");

  bool success = clopts.parse(argc, argv);
  if(!success) {    
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }

  std::cout << "logging level: " << std::max(opts.VERBOSE, OUTPUTLEVEL) << std::endl;
  global_logger().set_log_level(opts.VERBOSE);

  return EXIT_SUCCESS;
}

template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts) 
{
  size_t num_vertices = graph.num_vertices();
  size_t num_edges = graph.num_edges();
  std::cout << "Loaded: " << num_vertices << " vertices "
            << "and " << num_edges << " edges." << std::endl;
  std::cout << "Finished!" << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  typedef graphlab::omni_engine<belief_prop::bp_vertex_program<MAX_DIM> > engine_type;
  engine_type engine(dc, graph, exec_type, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_dense_table/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(test_dense_table test_dense_table.cpp)


================================================
FILE: toolkits/graphical_models/factors/tests/test_dense_table/test_dense_table.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * Test various functions of a dense table
 *
 *  \author Scott Richardson 
 */

#include <stdint.h>
#include <assert.h>
#include <stdlib.h>

#include <iostream>
#include <iterator>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

#include <factors/dense_table.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>          dense_table_t;
typedef graphlab::discrete_domain<MAX_DIM>      domain_t;
typedef graphlab::discrete_assignment<MAX_DIM>  assignment_t;
typedef graphlab::discrete_variable             variable_t;


dense_table_t create_rand_dense_table(unsigned v0_id, unsigned v1_id, unsigned v2_id) 
{
  variable_t v0(v0_id, 4);
  variable_t v1(v1_id, 3);
  variable_t v2(v2_id, 2);

  std::vector<variable_t> vars;
  vars.push_back(v0);
  vars.push_back(v1);
  vars.push_back(v2);
  domain_t domain(vars); 

  dense_table_t dt(domain);
  assignment_t da(domain);

  for(int i=0; i < domain.var(0).size(); ++i) {
    da.set_asg(v0, i);
    for(int j=0; j < domain.var(1).size(); ++j) {
      da.set_asg(v1, j);
      for(int k=0; k < domain.var(2).size(); ++k) {
        da.set_asg(v2, k);

        if(rand() % 100 <= 20) {
          dt.set_logP(da, -1 * (rand() % 100));
        }
      }
    }
  }

  return dt;
}

std::vector<double> create_rand_data_vector(size_t d0, size_t d1, size_t d2) {
  std::vector<double> v(d0*d1*d2);
  for(size_t i=0; i < v.size(); ++i) {
    v[i] = -1 * (rand() % 100);
  }
  return v;
}

// [-Inf -Inf -3   -Inf]
// [-15  -4   -Inf -23 ]
// [-Inf -Inf -Inf -Inf]
//
// [-Inf -Inf -Inf -Inf]
// [-20  -12  -19  -78 ]
// [-Inf -Inf -Inf -32 ]
void create_data_vector(std::vector<double> &data) 
{
  data.resize(4*3*2);
  data[0] = -1000; data[1] = -1000; data[2]  = -3;    data[3]  = -1000;
  data[4] = -15;   data[5] = -4;    data[6]  = -1000; data[7]  = -23;
  data[8] = -1000; data[9] = -1000; data[10] = -1000; data[11] = -1000;

  data[12] = -1000; data[13] = -1000; data[14] = -1000; data[15] = -1000;
  data[16] = -20;   data[17] = -12;   data[18] = -19;   data[19] = -78;
  data[20] = -1000; data[21] = -1000; data[22] = -1000; data[23] = -32;
}

void testDataReorder(unsigned v0_id, unsigned v1_id, unsigned v2_id) {
  // setup the data as if its from belief prop
  std::vector<double> data = create_rand_data_vector(4, 4, 4);

  // create a table with variables that are not resorted
  variable_t v0(0, 4);
  variable_t v1(1, 4);
  variable_t v2(2, 4);
  std::vector<variable_t> vars;
  vars.push_back(v0);
  vars.push_back(v1);
  vars.push_back(v2);
  dense_table_t dt_gm(vars, data);
  //std::cout << "dt_gm " << dt_gm << std::endl;

  //std::cout << "v0_id " << v0_id << " v1_id " << v1_id << " v2_id " << v2_id << std::endl;
  variable_t r0(v0_id, 4);
  variable_t r1(v1_id, 4);
  variable_t r2(v2_id, 4);
  std::vector<variable_t> reordered_vars;
  reordered_vars.push_back(r0);
  reordered_vars.push_back(r1);
  reordered_vars.push_back(r2);
  dense_table_t dt(reordered_vars, data);
  //std::cout << "dt " << dt << std::endl;


  for(int i=0; i < dt.numel(); ++i) {
    assignment_t da_gm(domain_t(vars), i);
    // permute assignment
    std::vector<size_t> asgs(da_gm.begin(), da_gm.end());
    assignment_t da(reordered_vars, asgs);
    
    //std::cout << "dt_gm{" << da_gm << "}=" << dt_gm.logP(da_gm) << "; "
    //          << "dt{" << da << "}=" << dt.logP(da) << std::endl;
    ASSERT_TRUE(dt_gm.logP(da_gm) == dt.logP(da));
  }
}

dense_table_t create_dense_table(unsigned v0_id, unsigned v1_id, unsigned v2_id) {
  // create a table with dimensions ordered like the original table (e.g., from 
  // belief prop) to make computing the linear index easier
  std::vector<double> data;
  create_data_vector(data);

  variable_t v0(v0_id, 4);
  variable_t v1(v1_id, 3);
  variable_t v2(v2_id, 2);

  std::vector<variable_t> vars;
  vars.push_back(v0);
  vars.push_back(v1);
  vars.push_back(v2);

  dense_table_t dt(vars, data);

  return dt;
}


void multiplyTest(unsigned v0_id, unsigned v1_id, unsigned v2_id) 
{
  dense_table_t dt = create_dense_table(v0_id, v1_id, v2_id);
  dense_table_t dt_gm = dt;
  //std::cout << "dt " << dt << std::endl;

  dt *= dt;
  //std::cout << "dt *= dt " << dt << std::endl;
  {
  domain_t::const_iterator asg = dt.domain().begin();
  domain_t::const_iterator end = dt.domain().end();
  for( ; asg != end; ++asg)
    ASSERT_EQ(dt.logP(*asg), dt_gm.logP(*asg)+dt_gm.logP(*asg));
  }

  dt = dt_gm;
  unsigned msg_length = dt.var(dt.domain().var_location(v0_id)).size();
  variable_t v0(v0_id, msg_length);
  dense_table_t msg(v0);
  domain_t::const_iterator asg = msg.domain().begin();
  domain_t::const_iterator end = msg.domain().end();
  for( ; asg != end; ++asg) {
    msg.set_logP( *asg, -1*(rand() % 100) );
  }

  dt *= msg;
  //std::cout << "msg = " << msg << std::endl;
  //std::cout << "dt *= msg " << dt << std::endl;

  for(size_t i=0; i < dt.size(); ++i) {
    assignment_t dt_asg(dt.domain(), i);
    assignment_t msg_asg = dt_asg.restrict(msg.domain());
    ASSERT_EQ(dt.logP(dt_asg), dt_gm.logP(dt_asg)+msg.logP(msg_asg));
  }
}

int main() {
  // create a table 
  dense_table_t dt_gm = create_dense_table(2, 0, 1);
  //std::cout << "dt_gm " << dt_gm << std::endl;  

  double err;

  // equals test
  dense_table_t dt = create_dense_table(2, 0, 1);
  //std::cout << "dt " << dt << std::endl;
  err = dt.l1_diff(dt_gm);
  //std::cout << "err: " << err << std::endl;
  ASSERT_LT(err, 1e-4);

  // copy test
  dense_table_t dt_copy;
  dt_copy = dt;
  err = dt.l1_diff(dt_copy);
  //std::cout << "err: " << err << std::endl;
  ASSERT_LT(err, 1e-4);

  // test dense table data reorder
  testDataReorder(2, 3, 4);
  testDataReorder(2, 4, 3);
  testDataReorder(3, 2, 4);
  testDataReorder(3, 4, 2);
  testDataReorder(4, 2, 3);
  testDataReorder(4, 3, 2);

  // multiply test - compare a (pre-ordered) dense table to the dense table 
  multiplyTest(2, 3, 4);
  multiplyTest(2, 4, 3);
  multiplyTest(3, 2, 4);
  multiplyTest(3, 4, 2);
  multiplyTest(4, 2, 3);
  multiplyTest(4, 3, 2);

  std::cout << "All tests passed" << std::endl;
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_sparse_table/CMakeLists.txt
================================================
project(GraphLab)

add_graphlab_executable(test_sparse_table test_sparse_table.cpp)
add_graphlab_executable(test_neg_relation test_neg_relation.cpp)


================================================
FILE: toolkits/graphical_models/factors/tests/test_sparse_table/test_neg_relation.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This file contains an example of graphlab belief propagation on
 * a factor node to constrain a negation relation a = -b
 *
 *  \author Scott Richardson 
 */

// INCLUDES ===================================================================>

// Including Standard Libraries


#include <cstdlib>
#include <cassert>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <limits>
#include <cmath>

#include <graphlab.hpp>

#include <factors/factor_graph.hpp>
#include <factors/bp_vertex_program.hpp>


// Include the macro for each operation
#include <graphlab/macros_def.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>          dense_table_t;
typedef graphlab::sparse_table<MAX_DIM>         sparse_table_t;
typedef graphlab::discrete_assignment<MAX_DIM>  assignment_t;
typedef graphlab::discrete_domain<MAX_DIM>      domain_t;
typedef graphlab::discrete_variable             variable_t;


struct clopts_vals { 
  clopts_vals(double bound = 1E-4, double damping = 0.0, std::string exec_t="sync") : 
      BOUND(bound), DAMPING(damping), exec_type(exec_t) { }

  double BOUND;
  double DAMPING;
  std::string exec_type;
};

int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals, 
    int argc, char** argv);
template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, const graphlab::command_line_options& clopts);
std::vector<double> compute_labels(size_t n_labels, 
    double max_range, double min_range);
void compute_normal_dist(double mean, double std_dev, 
    const std::vector<double>& labels, dense_table_t& prior);


// MAIN
// ============================================================================>
int main(int argc, char** argv) {
  std::cout << "This program solves the sum task."
            << std::endl;

  global_logger().set_log_level(LOG_DEBUG);

  graphlab::mpi_tools::init(argc, argv);
  ///! Create a distributed control object (must come after mpi_tools::init())
  graphlab::distributed_control dc; 

  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Run Loopy BP on a Network");
  clopts_vals clvals;
  if( setup_cli(clopts, clvals, argc, argv) != EXIT_SUCCESS ) return EXIT_FAILURE;

  ///! Create a distributed graph object 
  belief_prop::graph_type<MAX_DIM>::type graph(dc, clopts);


  // Create the factor graph ------------------------------------------>
  std::cout << "Loading Factor Graph" << std::endl;
  belief_prop::factor_graph<MAX_DIM> fgraph;

  // Create the variables
  size_t n_labels = 10; 
  double cost_scale = 100;

  double min_range = -3.0; double max_range = 3.0; 
  std::vector<double> labels = compute_labels(n_labels, max_range, min_range);

  variable_t var_a = fgraph.add_variable(n_labels, "var_a");
  {
  double mean = 2.3; double std_dev = 0.1; 
  dense_table_t& prior = fgraph.prior_for_variable(var_a);
  compute_normal_dist(mean, std_dev, labels, prior);
  std::cout << "var_a_prior=" << prior << std::endl;
  }

  variable_t var_b = fgraph.add_variable(n_labels, "var_b");
  {
  dense_table_t& prior = fgraph.prior_for_variable(var_b);
  prior.zero();
  std::cout << "var_b_prior=" << prior << std::endl;
  }


  // Create a factor
  std::vector<variable_t> args;
  // connect vertical neighbors
  args.push_back(var_a);
  args.push_back(var_b);
  // Build the factor
  domain_t dom(args);
  sparse_table_t neg(dom);
  // Set the weights
  assignment_t asg(dom);
  for(size_t i=0; i<n_labels; ++i) { 
    asg.set_asg(var_a, i);
    asg.set_asg(var_b, n_labels-(i+1));
    float err = 0;
    neg.set_logP(asg, -1*(cost_scale*err*err));
  }
  // Save the factor to the factor graph
  fgraph.add_factor(neg, "neg");
  std::cout << neg << std::endl;


  const size_t num_variables = fgraph.num_variables();
  const size_t num_factors = fgraph.num_factors();
  std::cout << "num_variables: " << num_variables << " num_factors: " << num_factors << std::endl;
  std::cout << "Finished!" << std::endl;


  // Build the BP graph from the factor graph---------------------------------->
  std::cout << "Building BP graph from the factor graph" << std::endl;
  fgraph.make_bp_graph( graph, clvals.BOUND, clvals.DAMPING ); 
  run_engine<MAX_DIM>(dc, graph, clvals.exec_type, clopts);
  fgraph.pull_beliefs_for_variables( graph );


  // Saving the output -------------------------------------------------------->
  fgraph.print_variable(var_a, labels);
  fgraph.print_variable(var_b, labels);

  double a = labels[fgraph.belief_for_variable(var_a).max_index()];
  double b = labels[fgraph.belief_for_variable(var_b).max_index()];
  std::cout << "var_a: " << a << std::endl;
  std::cout << "var_b: " << b << std::endl;

  double b_prime = -1*a;
  double err = std::abs(b - b_prime);
  std::cout << "b: " << b << " b_prime: " << b_prime << " err: " << err << std::endl;
  ASSERT_LT(err, 1E-4);
  std::cout << "All tests passed" << std::endl;
} // end of main


// UTILS
// ============================================================================>
int setup_cli(graphlab::command_line_options& clopts, clopts_vals& clvals,
    int argc, char** argv) {

  clopts.attach_option("bound", clvals.BOUND,
                       "Residual termination bound");
  clopts.attach_option("damping", clvals.DAMPING,
                       "The amount of message damping (higher = more damping)");
//  clopts.attach_option("beliefs", &beliefs_filename,
//                       "The file to save the belief predictions"); 
  clopts.attach_option("engine", clvals.exec_type,
                       "The type of engine to use {async, sync}.");
  clopts.set_scheduler_type("fifo");


  bool success = clopts.parse(argc, argv);
  if(!success) {    
    std::cout << "Error parsing command line arguments!"
              << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_FAILURE;
  }
  return EXIT_SUCCESS;
}

template<size_t MAX_DIM>
void run_engine(graphlab::distributed_control& dc, 
    typename belief_prop::graph_type<MAX_DIM>::type& graph, 
    const std::string& exec_type, 
    const graphlab::command_line_options& clopts) 
{
  size_t num_vertices = graph.num_vertices();
  size_t num_edges = graph.num_edges();
  std::cout << "Loaded: " << num_vertices << " vertices "
            << "and " << num_edges << " edges." << std::endl;
  std::cout << "Finished!" << std::endl;

  // Create the engine -------------------------------------------------------->
  std::cout << "Creating the engine. " << std::endl;
  typedef graphlab::omni_engine<belief_prop::bp_vertex_program<MAX_DIM> > engine_type;
  engine_type engine(dc, graph, exec_type, clopts);

  std::cout << "Scheduling all vertices" << std::endl;
  engine.signal_all();
  std::cout << "Starting the engine" << std::endl;
  engine.start();
  const float runtime = engine.elapsed_seconds();
  size_t update_count = engine.num_updates();
  std::cout << "Finished Running engine in " << runtime 
            << " seconds." << std::endl
            << "Total updates: " << update_count << std::endl
            << "Efficiency: " << (double(update_count) / runtime)
            << " updates per second "
            << std::endl;
}

std::vector<double> compute_labels(size_t n_labels, 
    double max_range, double min_range) 
{
  std::vector<double> labels(n_labels, 0.0);

  double step = (max_range - min_range)/(n_labels-1);
  for(unsigned i = 0; i < n_labels; ++i) {
    labels[i] = min_range + i*step;
  }
  return labels;
}

void compute_normal_dist(double mean, double std_dev, 
    const std::vector<double>& labels, dense_table_t& prior) 
{
  domain_t::const_iterator asg = prior.domain().begin();
  domain_t::const_iterator end = prior.domain().end();
  std::vector<double>::const_iterator label = labels.begin();
  for( ; asg != end; ++asg, ++label) {
    double nv = (*label-mean)/std_dev;
    prior.set_logP( *asg, -1*nv*nv );
  }
}


================================================
FILE: toolkits/graphical_models/factors/tests/test_sparse_table/test_sparse_table.cpp
================================================
/**  
 *  Software submitted by 
 *  Systems & Technology Research / Vision Systems Inc., 2013
 *
 *  Approved for public release; distribution is unlimited. [DISTAR Case #21428]
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * Test various functions of a sparse table
 *
 *  \author Scott Richardson 
 */

#include <stdint.h>
#include <assert.h>
#include <stdlib.h>

#include <iostream>
#include <iterator>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

#include <factors/sparse_table.hpp>
#include <factors/dense_table.hpp>


const size_t MAX_DIM = 4;
typedef graphlab::dense_table<MAX_DIM>          dense_table_t;
typedef graphlab::sparse_table<MAX_DIM>         sparse_table_t;
typedef graphlab::discrete_assignment<MAX_DIM>  assignment_t;
typedef graphlab::discrete_domain<MAX_DIM>      domain_t;
typedef graphlab::discrete_variable             variable_t;


sparse_table_t create_rand_sparse_table(unsigned v0_id, unsigned v1_id, unsigned v2_id) 
{
  variable_t v0(v0_id, 4);
  variable_t v1(v1_id, 3);
  variable_t v2(v2_id, 2);

  std::vector<variable_t> args;
  args.push_back(v0);
  args.push_back(v1);
  args.push_back(v2);

  domain_t dom(args); 
  sparse_table_t st(dom);

  domain_t::const_iterator asg_it = dom.begin();
  domain_t::const_iterator end = dom.end();
  for( ; asg_it != end; ++asg_it) {
    if(rand() % 100 <= 20) {
      st.set_logP( *asg_it, -1 * (rand() % 100) );
    }
  }

  return st;
}

// [-Inf -Inf -3   -Inf]
// [-15  -4   -Inf -23 ]
// [-Inf -Inf -Inf -Inf]
//
// [-Inf -Inf -Inf -Inf]
// [-20  -12  -19  -78 ]
// [-Inf -Inf -Inf -32 ]
void create_data_vector(std::vector<std::pair<size_t, double> > &data, 
    const domain_t &dom) 
{
  {
  size_t sa[] = {2,1,1}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -19));
  } {  
  size_t sa[] = {3,2,1}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -32));
  } {
  size_t sa[] = {2,0,0}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -3));
  } {
  size_t sa[] = {3,1,0}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -23));
  } {
  size_t sa[] = {3,1,1}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -78));
  } {
  size_t sa[] = {1,1,1}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -12));
  } {
  size_t sa[] = {1,1,0}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -4));
  } {
  size_t sa[] = {0,1,0}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -15));
  } {  
  size_t sa[] = {0,1,1}; 
  assignment_t da(dom, std::vector<size_t>(sa, sa+3));
  data.push_back(std::make_pair(da.linear_index(), -20));
  }
}

void testDataReorder(unsigned v0_id, unsigned v1_id, unsigned v2_id) {
  assert(v0_id != v1_id && v1_id != v2_id && v0_id != v2_id);

  // setup the data as if its from belief prop
  std::vector<std::pair<size_t, double> > data;
  variable_t v0(0, 4);
  variable_t v1(1, 3);
  variable_t v2(2, 2);

  std::vector<variable_t> args;
  args.push_back(v0);
  args.push_back(v1);
  args.push_back(v2);
  domain_t dom(args); 

  create_data_vector(data, dom);

  // create a table with variables that are not resorted
  sparse_table_t st_gm(args, data);
  //std::cout << "st_gm " << st_gm << std::endl;


  variable_t r0(v0_id, 4);
  variable_t r1(v1_id, 3);
  variable_t r2(v2_id, 2);
  std::vector<variable_t> reordered_args;
  reordered_args.push_back(r0);
  reordered_args.push_back(r1);
  reordered_args.push_back(r2);
  sparse_table_t st(reordered_args, data);
  //std::cout << "st " << st << std::endl;

  assignment_t asg_gm(st_gm.domain());
  domain_t::const_iterator asg_it = dom.begin();
  domain_t::const_iterator end = dom.begin();
  for( ; asg_it != end; ++asg_it) {
    asg_gm.set_asg(v0, asg_it->asg(r0));
    asg_gm.set_asg(v1, asg_it->asg(r1));
    asg_gm.set_asg(v2, asg_it->asg(r2));
    
    //std::cout << "st_gm{" << asg_gm << "}=" << st_gm.logP(asg_gm) << "; "
    //          << "st{" << asg_it << "}=" << st.logP(asg_it) << std::endl;
    ASSERT_TRUE(st_gm.logP(asg_gm) == st.logP(*asg_it));
  }
}

sparse_table_t create_sparse_table(unsigned v0_id, unsigned v1_id, unsigned v2_id) {
  // create a table with dimensions ordered like the original table (e.g., from 
  // belief prop) to make computing the linear index easier
  std::vector<std::pair<size_t, double> > data;
  {
  variable_t v0(0, 4);
  variable_t v1(1, 3);
  variable_t v2(2, 2);

  std::vector<variable_t> args;
  args.push_back(v0);
  args.push_back(v1);
  args.push_back(v2);
  domain_t dom(args); 

  create_data_vector(data, dom);
  }

  variable_t v0(v0_id, 4);
  variable_t v1(v1_id, 3);
  variable_t v2(v2_id, 2);

  std::vector<variable_t> args;
  args.push_back(v0);
  args.push_back(v1);
  args.push_back(v2);

  sparse_table_t st(args, data);

  return st;
}


void multiplyTest(unsigned v0_id, unsigned v1_id, unsigned v2_id, dense_table_t& dt) 
{
  dense_table_t  dt_gm = dt;
  sparse_table_t st = create_sparse_table(v0_id, v1_id, v2_id);
  sparse_table_t st_gm = st;
  //std::cout << "st " << st << std::endl;

  dt *= dt;

  st *= st;
  //std::cout << "st *= st " << st << std::endl;

  {
  dense_table_t st_as_dt(st.domain());
  st.as_dense_table(st_as_dt); 
  double err = dt.l1_diff(st_as_dt);
  //std::cout << "err: " << err << std::endl;
  ASSERT_LT(err, 1e-4);
  }


  dt = dt_gm;
  st = st_gm;
  unsigned msg_length = st.var(st.domain().var_location(v0_id)).size();
  variable_t v0(v0_id, msg_length);
  dense_table_t msg(v0);

  domain_t::const_iterator asg = msg.domain().begin();
  domain_t::const_iterator end = msg.domain().end();
  for( ; asg != end; ++asg) {
    msg.set_logP( *asg, -1*(rand() % 100) );
  }

  dt *= msg;

  st *= msg;
  //std::cout << "st *= msg " << st << std::endl;

  {
  dense_table_t st_as_dt(st.domain());
  st.as_dense_table(st_as_dt);
  double err = dt.l1_diff(st_as_dt);
  ASSERT_LT(err, 1e-4);
  }
}

int main() {
  // create a table 
  sparse_table_t st_gm = create_sparse_table(2, 0, 1);
  //std::cout << "st_gm " << st_gm << std::endl;  
  dense_table_t dt(st_gm.domain());
  st_gm.as_dense_table(dt);

  // equals test
  sparse_table_t st = create_sparse_table(2, 0, 1);
  //std::cout << "st " << st << std::endl;
  ASSERT_TRUE(st == st_gm);

  // copy test
  sparse_table_t st_copy;
  st_copy = st;
  ASSERT_TRUE(st == st_copy);
  
  // test sparse table data reorder
  testDataReorder(2, 3, 4);
  testDataReorder(2, 4, 3);
  testDataReorder(3, 2, 4);
  testDataReorder(3, 4, 2);
  testDataReorder(4, 2, 3);
  testDataReorder(4, 3, 2);

  // multiply test - compare a dense table to the sparse table. 
  // (i've already compared a pre-ordered dense table to the 
  // re-ordered ones.)
  multiplyTest(2, 0, 1, dt);

  std::cout << "All tests passed" << std::endl;
}


================================================
FILE: toolkits/graphical_models/graphical_models.dox
================================================
/**

\page graphical_models Graphical Models

\brief The Graphical Models toolkit contains a collection of
applications for reasoning about structured noisy data.  <a
href="http://en.wikipedia.org/wiki/Graphical_model">Graphical models</a> provide a compact interpretable
representation of complex statistical phenomena by encoding random
variables as vertices in a graph and relationships between those
variables as edges. Given a graphical model representation, we can
then apply <a href="http://en.wikipedia.org/wiki/Bayes%27_rule">Bayes rule</a> to quantitatively infer
properties of some variables given observations about others.
Graphical models also provide the unique ability to quantify
uncertainty in our prediction.

\section distributed_dual_decomposition Distributed Dual Decomposition

Dual Decomposition (DD), also called Lagrangian Relaxation, is a powerful technique 
with a rich history in Operations Research. DD solves a relaxation of difficult optimization
problems by decomposing them into simpler subproblems, solving these simpler subproblems
independently and then combining these solutions into an approximate global solution. 

More details about DD for solving Maximum A Posteriori (MAP) inference problems in Markov Random
Fields (MRFs) can be found in the following:

\verbatim
D. Sontag, A. Globerson, T. Jaakkola. 
Introduction to Dual Decomposition for Inference. 
Optimization for Machine Learning, editors S. Sra, S. Nowozin, and S. J. Wright: MIT Press, 2011.
\endverbatim

Implemented by <a href="http://www.cs.cmu.edu/~afm/">Andre' F. T. Martins</a> and <a href="http://filebox.ece.vt.edu/~dbatra/">Dhruv Batra</a>.

\subsection running_ddd Running DDD

The input MRF graph is assumed to be in the standard <a href="http://www.cs.huji.ac.il/project/PASCAL/fileFormat.php">UAI file format</a>. 
For example a 3x3 grid MRF can be found here: <a href="http://www.cs.huji.ac.il/project/PASCAL/examples/grid3x3.uai">grid3x3.uai</a>.

The program can be run like this:

\verbatim
> ./dd --graph grid3x3.uai 
\endverbatim

Other arguments are:

\li <b>--help</b> Display the help message describing the list of
options.

\li <b>--output</b> The output directory in which to save
the final predictions.

\li <b>--dualimprovthres</b> (Optional, default 0.00001) The amount of change in
dual objective (in log-space) that will be tolerated at convergence.

\li <b>--pdgapthres</b> (Optional, default 0.1) The tolerance level for zero primal-dual gap.

\li <b>--maxiter</b> (Optional, default 10000) The maximum no. of dual update iterations.

\li <b>--engine</b> (Optional, Default: asynchronous) The engine type to
use when executing the vertex-programs
       - <b>synchronous</b>: All LoopyBP updates are run at the same
         time (Synchronous BP). This engine exposes greater parallelism but is less
         computationally efficient.
       - <b>asynchronous</b>: LoopyBP updates are run asynchronous
         with priorities (Residual BP).  This engine is has greater
         overhead and exposes less parallelism but can substantially
         improve the rate over convergence.

\li <b>--ncpus</b> (Optional, Default 2) The number of local computation 
threads to use on each machine.  This should typically match the number 
of physical cores. 

\li <b>--scheduler</b> (Optional, Default sweep) The scheduler to use when 
running with the asynchronous engine.  The default is typically sufficient. 

\li <b>--engine_opts</b> (Optional, Default empty) Any additional engine
options. See <b>--engine_help</b> for a list of options.


\li <b>--graph_opts</b> (Optional, Default empty) Any additional graph
options. See <b>--graph_help</b> for a list of options.

\li <b>--scheduler_opts</b> (Optional, Default empty) Any additional scheduler
options. See <b>--scheduler_help</b> for a list of options.

\section structured_prediction Structured Prediction

Currently the Graphical Models toolkit contains a discrete structured
prediction application which can be applied to a wide range of
prediction tasks where we have prior noisy predictions for a large
number of variables (e.g., political inclination of each user or
article) and a graph encoding similarity or dissimilarity
relationships between those variables (e.g., friends share similar
political inclinations).  The structured prediction application then
infers the posterior distribution for each random variable improving
upon the prior prediction and providing a measure of uncertainty.

\subsection structured_prediction_example Structured Prediction Example

For example, supposed we had the recent posts for each user in a large
social network.  Based on frequency each user mentions a conservative
or liberal news item we might be able to construct a noisy prior
estimate of their political inclination.  A user with no posts may
have a prior of 0.5 conservative and 0.5 liberal while another user
that frequently mentions a conservative pundit might have a prior of
0.8 conservative and 0.2 liberal.  If a user with no posts is friends
with a user that frequently mentions conservative news items, then it
is more likely that the user with no posts is also conservative. More
generally we can leverage the social network to improve our prediction
for each user by examining not only their immediate friends but also
the community around each user.  This exactly what the structured
prediction application accomplishes.  The output of the structured
prediction application is the posterior estimates for each user.


\subsection structured_prediction_model The Structured Prediction Model

The structure prediction application applies the <a href="http://en.wikipedia.org/wiki/Belief_propagation">Loopy
Belief propagation (LBP)</a> algorithm to a pair-wise <a
href="http://en.wikipedia.org/wiki/Markov_random_field">Markov Random Field</a> encoding the classic <a href="http://en.wikipedia.org/wiki/Potts_model">Potts
Model</a>.  The joint probability mass function is given by:

\image html potts_model.png


The edge weight \c w is obtained from the graph file but defaults to
w=1 if no edge weight is provided.  The smoothing paramater \c
SMOOTHING can be set as a command line argument and controls the
general smoothing.

\subsection loopy_bp_algorithm Loopy BP Algorithm 

The structured prediction application uses an <a href="http://en.wikipedia.org/wiki/Belief_propagation">Loopy BP</a>
approximate inference algorithm to estimate the posterior marginals.
The Loopy BP algorithm iteratively estimates a set of edge parameters
commonly referred to as "messages."  The structured prediction
application uses the asynchronous residual variant of the Loopy BP
algorithm.


\subsection structured_prediction_data Synthetic Data

To demonstrate the power of the structured prediction application we
have provided a synthetic dataset generator.  To use the synthetic
generator simply build and run:

\verbatim
./synthetic_image_data 
\endverbatim

This will create the synthetic noisy image:

\image html noisy_img.jpeg

as well as the true underlying image that we would like to recover:

\image html orig_img.jpeg

Each pixel in the image corresponds to a random variable whose unknown
is the true pixel color.  The goal is to use the neighborhood of each
pixel to improve our estimate and resolve the original image.  The \c
synthetic_image_data application will also create the two input files
needed to run the structured prediction application. The first is
synthetic prior estimates for each pixel.  Each row begins with the
random variable id followed by the prior probability distribution for
that random variable.  Notice that the prior assigns half of the mass
to the observed pixel value and the remaining mass to the other
candidate pixel values.

\verbatim
> head synth_vdata.tsv 
0	0.125	0.5	0.125	0.125	0.125
1	0.125	0.125	0.125	0.125	0.125
2	0.125	0.125	0.5	0.125	0.125
3	0.125	0.125	0.125	0.125	0.5
4	0.125	0.125	0.125	0.125	0.5
\endverbatim

The second \c synth_edata.tsv file contains the graph structure with
each line corresponding to an edge.  Here we do not assign edge
weights (and so the default weight of 1) will be used on all edges.
Had we wanted to use weighted edges we would have added the weight
value after each edge.

\verbatim
> head synth_edata.tsv 
0	65536
0	1
1	65537
1	2
2	65538
2	3
\endverbatim

We can now run the structured prediction application on the synthetic
image.

\verbatim
> ./lbp_structured_prediction --prior synth_vdata.tsv --graph synth_edata.tsv \
                          --output posterior_vdata.tsv
\endverbatim

Once the application terminates the final predictions will be stored
in the sequence of files \c posterior_vdata.tsv_X_of_X in exactly the
same format as the prior \c synth_vdata.tsv.  

\verbatim
> ls -l posterior_vdata.tsv_*
posterior_vdata.tsv_1_of_2
posterior_vdata.tsv_2_of_2
\endverbatim

in the format:

\verbatim
> head posterior_vdata.tsv_1_of_2 
0	0.0237064	0.0947784	0.0245065	0.0323516	0.824657
1	0.00886895	0.0176509	0.0114683	0.0112453	0.950767
2	0.00402855	0.00489077	0.0161093	0.00426689	0.970705
3	0.00088747	0.00091284	0.00124409	0.000894688	0.996061
4	0.000696577	0.000695895	0.000706134	0.000695375	0.997206
5	0.000740404	0.000705437	0.000706437	0.000705451	0.997142
\endverbatim

To visualize the predictions for the synthetic application we run:

\verbatim
> cat posterior_vdata.tsv_* | ./synthetic_image_data --pred pred_image.jpeg
Create a synthetic noisy image.
Reading in predictions
nrows: 200
ncols: 200
minp:  0
maxp:  4
\endverbatim

If we then open \c pred_image.jpeg we get:

\image html pred_img.jpeg

Not bad!

\subsection structured_predictions_options Options

\li <b>--help</b> Display the help message describing the list of
options.

\li <b>--prior</b> The prior vertex data file.

\li <b>--output</b> The output directory/file_prefix in which to save
the final predictions.

\li <b>--graph</b> The graph describing the random variable dependency
structure as well as optional weights.

\li <b>--smoothing</b> (Optional, Default 2) The default smoothing
parameter. Larger values imply stronger relationships between adjacent
random variables in the graph.

\li <b>--damping</b> (Optional, Default 0.1) The amount of damping to
use.  Damping can help ensure that the algorithm converges.  Larger
damping values lead to slower but more reliable convergence.

\li <b>--tol</b> (Optional, default 0.01) The amount of change in
parameter values (in log-space) that will be tolerated at convergence.

\li <b>--map</b> (Optional, default false) If set to true the
maximizing assignment will be returned in the output instead of the
distribution. 


\li <b>--engine</b> (Optional, Default: asynchronous) The engine type to
use when executing the vertex-programs
       - <b>synchronous</b>: All LoopyBP updates are run at the same
         time (Synchronous BP). This engine exposes greater parallelism but is less
         computationally efficient.
       - <b>asynchronous</b>: LoopyBP updates are run asynchronous
         with priorities (Residual BP).  This engine is has greater
         overhead and exposes less parallelism but can substantially
         improve the rate over convergence.

\li <b>--ncpus</b> (Optional, Default 2) The number of local computation 
threads to use on each machine.  This should typically match the number 
of physical cores. 

\li <b>--scheduler</b> (Optional, Default sweep) The scheduler to use when 
running with the asynchronous engine.  The default is typically sufficient. 

\li <b>--engine_opts</b> (Optional, Default empty) Any additional engine
options. See <b>--engine_help</b> for a list of options.


\li <b>--graph_opts</b> (Optional, Default empty) Any additional graph
options. See <b>--graph_help</b> for a list of options.

\li <b>--scheduler_opts</b> (Optional, Default empty) Any additional scheduler
options. See <b>--scheduler_help</b> for a list of options.


*/


// 
// P(x_1, \ldots, x_n) \propto 
// \exp \left(
// - \sum_{(i,j) \in E} I[x_i \neq x_j ] w_{(i,j)} * \mathtt{SMOOTHING}
// \right) \prod_{i \in V} \mathtt{prior}_i(x_i)
// 


================================================
FILE: toolkits/graphical_models/lbp_structured_prediction.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application used for structured prediction on a graph.
 * For example, suppose you want to model the interests of users in a
 * social network.  
 *
 * Overview and Usage
 * ======================
 *
 * For simplicity lets suppose you want to know the users interest in
 * the categories movies, sports, and music.  After analyzing each
 * users profile you might be able to estimate a crude distribution
 * over her interests.  However you would like to leverage similarity
 * among friends to improve your estimates.  This application is
 * designed to do exactly that.
 *
 * As an input you provide two folders (or files) the first contains
 * the prior probabilities for each vertex in the form:
 *
 *   <vertexId> \t <Pr Category1> \t <Pr Category2> ... \n
 *
 * For example:
 *   
 *   1    0.2   0.2   0.6
 *   2    0.3   0.6   0.1
 *   3    0.3   0.3   0.4
 *           ... 
 * 
 * The second folder contains the graph structure in the form:
 *
 *   <sourceId> \t <targetId> \t [Optional Weight]
 *
 * For example:
 *
 *   1   2
 *   1   3  1.7
 *   3   2  0.3
 *
 * The default weight value is 1 (times the smoothing parameter passed
 * in as a command line argument).  Larger weight values imply
 * stronger relationships.  A negative weight implies a "repulsive"
 * relationship in which neighboring vertices would like to have
 * different assignments.
 *
 * We have provided a synthetic data generator which creates a
 * synthetic dataset for an simulated image denoising task. See the
 * synthetic_image_data application for details.
 *
 * As output the application produces another set files with a format
 * identical to the vertex prior file with each weight (probability)
 * corresponding to the posterior predictions.
 *
 * Technical Explanation
 * ========================
 *
 * This application creates a pair-wise Markov Random Field with
 * Ising-Potts edge factors and then uses residual loopy belief
 * propagation to compute posterior belief estimates for each vertex.
 *
 *
 *  \author Joseph Gonzalez
 */


#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <Eigen/Dense>
#include "eigen_serialization.hpp"


#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


/**
 * \brief Eigen library vectors are used to store factor in _LOG
 * SPACE_.
 */
typedef Eigen::VectorXd factor_type;

/**
 * \brief The Ising smoothing parameter which controls the coupling
 * between adjacent predictions in the graph.  Larger values imply
 * greater smoothing (stronger coupling). 
 *
 * \code
 * edge_factor(xi, xj) = exp( (xi == xj)? 0 : -SMOOTHING * edge_weight ); 
 * \endcode
 *
 * Not that the default edge weight is 1 however the graph file can
 * contain an additional edge weight column which allows per edge
 * control of the smoothing parameter.
 *
 * This parameter is set as a command line argument.
 */
double SMOOTHING = 2;

/**
 * \brief The Damping parameter which helps ensure stable convergence.
 * Larger damping values lead to slower but more stable convergence.
 *
 * Currently damping is implemented in log-space in the following
 * equation:
 *
 * \code
 * log(new_message) = DAMPING * log(old_message) + 
 *                         (1-DAMPING) * log(new_message);
 * \endcode
 *
 * This parameter is set as a command line argument.
 */
double DAMPING = 0.1;

/**
 * \brief The convergence threshold for each message.  Smaller values
 * imply tighter convergence but slower execution.
 *
 *
 * The algorithm convergence when:
 *   
 * \code
 * sum(abs(log(old_message) - log(new_message))) < TOLERANCE
 * \endcode
 *
 * The parameter is set as a command line argument
 */
double TOLERANCE = 0.01;


/**
 * \brief The vertex data contains the vertex potential as well as the
 * current belief estimate and represents a random variable in the
 * Markov Random Field.
 *
 * The vertex potential represents the prior and is obtained from the
 * vertex prior file (stored in log form).
 *
 * The belief represents the current posterior estimate.
 */
struct vertex_data {
  factor_type belief;
  factor_type potential;
  void load(graphlab::iarchive& arc) { arc >> belief >> potential; }
  void save(graphlab::oarchive& arc) const { arc << belief << potential; }
}; // end of vertex_data


/**
 * \brief The edge data represents an edge in the Markov Random Field
 * and contains the loopy belief propagation message in both
 * directions along that edge as well as the old message in each
 * direction.  In addition each edge contains the weight parameter
 * used to set edge specific smoothing (default value is 1).
 */
class edge_data {
  /**
   * \brief We store old and new messages in both directions as an
   * array of messages.  The particular message index is then computed
   * using the \ref message_idx function.
   */
  factor_type messages_[4];
  /**
   * \brief The weight associated with the edge (used to scale the
   * smoothing parameter)
   */
  double weight_;
  /**
   * \brief The function used to compute the message index in the edge
   * message array.
   */
  size_t message_idx(size_t source_id, size_t target_id, bool is_new) {
    return size_t(source_id < target_id)  + 2 * size_t(is_new);
  }

public:

  edge_data(const double w = 1) : weight_(w) { }
  const double& weight() const { return weight_; }

  /**
   * \brief Get the new message value from source_id to target_id
   */
  factor_type& message(size_t source_id, size_t target_id) { 
    return messages_[message_idx(source_id, target_id, true)];
  }
  /**
   * \brief Get the old message value from source_id to target_id
   */
  factor_type& old_message(size_t source_id, size_t target_id) { 
     return messages_[message_idx(source_id, target_id, false)];
  }

  /**
   * \brief Set the old message value equal to the new message value
   */
  void update_old(size_t source_id, size_t target_id) { 
    old_message(source_id, target_id) = message(source_id, target_id);
  }
  
  /**
   * \brief Initialize the edge data with source and target having the
   * appropriate number of states.
   *
   * \param source_id the vertex id of the source
   * \param nsource the number of states the source vertex takes
   * \param target_id the vertex id of the target
   * \param ntarget the number of states the target vertex takes
   */
  void initialize(size_t source_id, size_t nsource, size_t target_id, size_t ntarget) {
    ASSERT_GT(nsource, 0); ASSERT_GT(ntarget, 0);
    message(source_id, target_id).setZero(ntarget);
    old_message(source_id, target_id).setZero(ntarget);
    message(target_id, source_id).setZero(nsource);
    old_message(target_id, source_id).setZero(nsource);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 4; ++i) arc << messages_[i];
    arc << weight_;
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 4; ++i) arc >> messages_[i];
    arc >> weight_;
  }
}; // End of edge data


/**
 * \brief The graph type used to store the Markov Random Field with
 * vertex data containing node potentials and beliefs and edge data
 * containing messages and weights.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/** 
 * \brief The Loopy Belief Propagation Vertex Program which computes
 * the product of the inbound messages during the gather phase,
 * updates the belief during the apply phase, and then computes the
 * new out-bound messages during the scatter phase.
 *
 * Since the gather phase is computing the product of the inbound
 * messages and the messages are stored in log form the resulting sum
 * operation is actually a vector sum and so the gather type is simply
 * the factor type and the operator+= operation for the factor type is
 * sufficient.
 *
 */
struct bp_vertex_program : 
  public graphlab::ivertex_program< graph_type, factor_type,
                                    graphlab::messages::sum_priority >,
  public graphlab::IS_POD_TYPE {

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * \brief Update the old message to be the new message and collect the
   * message value.
   */
  factor_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Update the old message with the value of the new Message.  We
    // then receive the old message during gather and then compute the
    // "cavity" during scatter (again using the old message).
    edata.update_old(other_vertex.id(), vertex.id());
    const factor_type& recv_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    // Ensure that the received message has the correct size
    ASSERT_EQ(recv_message.size(), vertex.data().potential.size());
    return recv_message;
  }; // end of gather function

  /**
   * \brief Multiply message product by node potential and update the
   * belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
             const factor_type& total) {
    // If we have no neighbors than the belief is equal to the
    // potential so simply update the belief
    if(vertex.num_in_edges() + vertex.num_out_edges() == 0) {
      vertex.data().belief = vertex.data().potential;
    } else {
      vertex_data& vdata = vertex.data();
      ASSERT_EQ(vdata.potential.size(), total.size());
      // Multiply (add in log space) the potential to compute the belief
      vdata.belief = vdata.potential + total;
      ASSERT_GT(vdata.belief.size(), 0);
      // Rescale the belief to ensure numerical stability.  (This is
      // essentially normalization in log-space.)
      vdata.belief.array() -= vdata.belief.maxCoeff();
    }
  }; // end of apply

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /**
   * \brief Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {  
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Divide (subtract in log space) out of the belief the old in
    // message to construct the cavity
    const factor_type& old_in_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    ASSERT_EQ(old_in_message.size(), vertex.data().belief.size());
    factor_type cavity = vertex.data().belief - old_in_message;
    // compute the new message by convolving with the Ising-Potts Edge
    // factor.
    factor_type& new_out_message = 
      edata.message(vertex.id(), other_vertex.id());
    const factor_type& old_out_message = 
      edata.old_message(vertex.id(), other_vertex.id());
    convolve(cavity, edata.weight(), new_out_message);
    // Renormalize (done in log space)
    new_out_message.array() -= new_out_message.maxCoeff();
    // Apply damping to the message to stabilize convergence.
    new_out_message = DAMPING * old_out_message + 
      (1-DAMPING) * new_out_message;
    // Compute message residual
    const double residual = 
      (new_out_message - old_out_message).cwiseAbs().sum();
    context.clear_gather_cache(other_vertex);
    // Schedule the adjacent vertex
    if(residual > TOLERANCE) context.signal(other_vertex, residual);
 }; // end of scatter

private:

  /**
   * \brief Compute the convolution of the cavity with the Ising-Potts
   * edge potential and store the result in the message
   *
   * \param cavity the belief minus the in-bound message
   * \param weight the edge weight used to scale the smoothing parameter
   * \param [out] message The message in which to store the result of
   * the convolution.
   */
  inline void convolve(const factor_type& cavity, const double& weight, 
                       factor_type& message) const {
    for(int i = 0; i < message.size(); ++i) {
      double sum = 0;
      for(int j = 0; j < cavity.size(); ++j) {
        sum += std::exp( cavity(j)  + ( i == j? 0 : -(SMOOTHING*weight) ) ); 
      }
      // To try and ensure numerical stability we do not allow
      // messages to underflow in log-space
      message(i) = (sum > 0)? std::log(sum) : 
        std::numeric_limits<double>::min();
    }
  } // end of convolve
  
  /**
   * \brief Given an edge and a vertex return the other vertex along
   * that edge. 
   */
  inline vertex_type get_other_vertex(edge_type& edge, 
                                      const vertex_type& vertex) const {
    return vertex.id() == edge.source().id()? edge.target() : edge.source();
  }; // end of other_vertex

}; // end of class bp_vertex_program


/**
 * \brief The vertex load is used by the graph loading API to parse
 * the lines of prior data in the vertex data file.
 *
 * This parser uses the boost::spirit library to parse the vertex data
 * file. As a consequence it is fairly flexible allowing both comma
 * and tab delimited files as well as vertices with different numbers
 * of states.
 */
bool vertex_loader(graph_type& graph, const std::string& fname, 
                   const std::string& line) {
  // If the line is empty simply skip it
  if(line.empty()) return true;
  // We use the boost spirit parser which requires (too) many separate
  // namespaces so to make things clear we shorten them here.
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type vid(-1);
  std::vector<double> values;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(vid) = qi::_1] >> -qi::char_(",") >>
      (qi::double_[phoenix::push_back(phoenix::ref(values), qi::_1)] % -qi::char_(",") )
      )
     ,
     //  End grammar
     ascii::space); 
  // Test to see if the boost parser was able to parse the line
  if(!success) {
    logstream(LOG_ERROR) << "Parse error in vertex prior parser." << std::endl;
    return false;
  }

  // Ensure that a prior was provided.  Technically this should not be
  // reached since the parser requires at least one prior entry
  if(values.empty()) {
    logstream(LOG_ERROR) << "Vertex has no prior." << std::endl;
    return false;
  }

  // Renormalize the vertex data. We require positive probabilities.
  double sum = 0;
  for(size_t i = 0; i < values.size(); ++i) {
    if(values[i] < 0) { 
      logstream(LOG_ERROR) << "Encountered negative probability." << std::endl;
      return false;
    }
    if(values[i] == 0) { 
      logstream(LOG_ERROR) 
        << "Zero probability assignments are not currently supported." << std::endl;
      return false;
    }
    sum += values[i]; 
  }
  ASSERT_GT(sum, 0);
  for(size_t i = 0; i < values.size(); ++i) values[i] /= sum;

  vertex_data vdata;
  vdata.potential.resize(values.size());
  for(size_t i = 0; i < values.size(); ++i) {
    ASSERT_GT(values[i], 0);
    vdata.potential(i) = std::log(values[i]);
  }
  graph.add_vertex(vid, vdata);
  return true;
} // end of vertex_loader;


/**
 * \brief The edge data loader is used by the GraphLab graph loading
 * API to parse lines in the edge data file. 
 */
bool edge_loader(graph_type& graph, const std::string& fname, 
                 const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type source(-1), target(-1);
  double weight = 1;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source) = qi::_1] >>  -qi::char_(',') 
      >> qi::ulong_[phoenix::ref(target) = qi::_1] >>  
      -(-qi::char_(',') >> qi::double_[phoenix::ref(weight) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space); 
  if(!success) return false;  
  graph.add_edge(source, target, edge_data(weight));
  return true;
} // end of edge loader


/**
 * \brief The edge initializer is used to allocate the messages along
 * each edge based on the number of states of the source and target
 * vertex.
 */
void edge_initializer(graph_type::edge_type& edge) {
  edge_data& edata = edge.data();
  const graphlab::vertex_id_type source_id = edge.source().id();
  const size_t nsource = edge.source().data().potential.size(); 
  const graphlab::vertex_id_type target_id = edge.target().id();
  const size_t ntarget = edge.target().data().potential.size();
  edata.initialize(source_id, nsource, target_id, ntarget);
} // end of edge initializer


/**
 * \brief The belief prediction saver is used to save the belief
 * predictions for each vertex.
 */
struct belief_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    strm << vertex.id() << '\t';
    factor_type pred = vertex.data().belief;
    double sum = 0;
    for(int i = 0; i < pred.size(); ++i) 
      sum += (pred(i) = std::exp(pred(i)));
    pred.array() /= sum;
    for(int i = 0; i < pred.size(); ++i) 
      strm << pred(i) << (i+1 < pred.size()? '\t' : '\n');
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of belief_prediction_saver


/**
 * \brief The MAP prediction saver is used to save the map estimated
 * for each vertex.  The MAP estimate is the most likely assignment
 */
struct map_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    size_t prediction = 0;
    vertex.data().belief.maxCoeff(&prediction);
    strm << vertex.id() << '\t' << prediction << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of map prediction_saver


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // Parse command line options -----------------------------------------------
  // \todo update description string
  const std::string description = "Structure prediction solver";
  graphlab::command_line_options clopts(description);
  std::string prior_dir; 
  std::string graph_dir;
  std::string output_dir = "pred";
  std::string exec_type = "async";
  bool map = false;
  clopts.attach_option("prior", prior_dir,
                       "The directory containing the prior");
  clopts.add_positional("prior");
  clopts.attach_option("graph", graph_dir,
                       "The directory containing the adjacency graph");
  clopts.add_positional("graph");
  clopts.attach_option("output", output_dir,
                       "The directory in which to save the predictions");
  clopts.add_positional("output");
  clopts.attach_option("smoothing", SMOOTHING,
                       "The amount of smoothing (larger = more)");
  clopts.attach_option("damping", DAMPING,
                       "The amount of damping (0 -> no damping and 1 -> no progress)");
  clopts.attach_option("tol", TOLERANCE,
                       "The tolerance level for convergence.");
  clopts.attach_option("map", map,
                       "Return maximizing assignment instead of the posterior distribution.");
  clopts.attach_option("engine", exec_type,
                       "The type of engine to use {async, sync}.");
  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  if(prior_dir.empty()) {
    logstream(LOG_ERROR) << "No prior was provided." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }

  if(graph_dir.empty()) {
    logstream(LOG_ERROR) << "No graph was provided." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();

  ///! load the graph
  graph_type graph(dc, clopts);  


  ///! load the graph
  graph.load(prior_dir, vertex_loader);
  graph.load(graph_dir, edge_loader);
  graph.finalize();
  graph.transform_edges(edge_initializer);

  typedef graphlab::omni_engine<bp_vertex_program> engine_type;
  engine_type engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  graphlab::timer timer;
  engine.start();  
  const double runtime = timer.current_time();
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    
    
  std::cout << "Saving predictions" << std::endl;
  const bool gzip_output = false;
  const bool save_vertices = true;
  const bool save_edges = false;
  const size_t threads_per_machine = 2;
  if(map) {
    graph.save(output_dir, map_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  } else { 
    graph.save(output_dir, belief_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }


  //  graphlab::stop_metric_server_on_eof();
  graphlab::stop_metric_server();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/graphical_models/mplp_denoise.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

/**
 * This file contains an example of graphlab used for MAP inference 
 * in a discrete graphical model (pairwise MRF). The algorithm
 * implemented is the MPLP LP-Relaxation scheme of Globerson & Jaakkola. 
 *
 *  \author Dhruv Batra
 */


#include <vector>
#include <string>
#include <fstream>


#include <Eigen/Dense>

#include <cv.h>
#include <highgui.h>  


#include <graphlab.hpp>

#include "eigen_serialization.hpp"

#include <graphlab/macros_def.hpp>

typedef Eigen::VectorXd vector;
typedef Eigen::MatrixXd matrix;

template <typename T>
inline std::ostream& operator<<(std::ostream& os, std::vector<T>& x)
{
    typename std::vector<T>::const_iterator i(x.begin());
    while(i != x.end()) os << *i++ << ' ';
    return os;
}

// Global variables
size_t NCOLORS;
double SIGMA;
double BOUND;

// LP-based upper-bound on MAP
graphlab::mutex mutex;
//mutex.lock();
double LPval = 0;
double MAPval = 0;
double MAPrepval = 0;
//mutex.unlock();

// Shared base edge potential
matrix THETA_ij; 

// keep track of predictions at each node
vector PRED_COLOR;

// check if all nodes are visited
//Eigen::Matrix<graphlab::atomic<int>, Eigen::Dynamic,1> vinit;
//Eigen::Matrix<graphlab::atomic<int>, Eigen::Dynamic,1> vapply;
//vector vinit;
//vector vapply;
//std::vector<graphlab::atomic<int> > vinit;
//std::vector<graphlab::atomic<int> > vapply;
std::vector<int> vinit;
std::vector<int> vapply;


// STRUCTS (Edge and Vertex data) =============================================>

/**
 * Each GraphLab vertex is a (pairwise) factor from the MRF
 */
struct vertex_data {
    /** variable ids */
    int i, j; 
    
    // degree of these nodes in the MRF
    int deg_i, deg_j; 
    
    /** observed color for each variable */
    float obs_color_i, obs_color_j;
    /** predicted color for each variable */
    float pred_color_i, pred_color_j;
    
    // current maximizers of reparameterized theta_i, theta_j and theta_IJ
    int maxI, maxJ, maxIJ_i, maxIJ_j;
    
    // current contribution to LP dual value
    double vali, valj, valij;
    // current contribution to MAP value
    double pvali, pvalj, pvalij;
    // current contribution to MAPrep value
    double prvali, prvalj, prvalij;
    
    // since variables i and j are present in multiple factors, this determines who owns them
    bool iowner, jowner; 
    
    /** dual variables being optimized (or messages) */
    vector delf_i, delf_j;  

    // constructor
    vertex_data(): i(-1), j(-1), deg_i(0), deg_j(0), 
    obs_color_i(-1), obs_color_j(-1), 
    pred_color_i(0), pred_color_j(0),
    vali(0), valj(0), valij(0),
    pvali(0), pvalj(0), pvalij(0), 
    prvali(0), prvalj(0), prvalij(0), 
    iowner(false), jowner(false)
    { }
    
    void save(graphlab::oarchive& arc) const 
    {
        arc << i << j 
        << deg_i << deg_j 
        << obs_color_i << obs_color_j 
        << pred_color_i << pred_color_j 
        << maxI << maxJ << maxIJ_i << maxIJ_j
        << vali << valj << valij 
        << pvali << pvalj << pvalij 
        << prvali << prvalj << prvalij 
        << iowner << jowner
        << delf_i << delf_j;
    }
    void load(graphlab::iarchive& arc) 
    {
        arc >> i >> j 
        >> deg_i >> deg_j
        >> obs_color_i >> obs_color_j 
        >> pred_color_i >> pred_color_j 
        >> maxI >> maxJ >> maxIJ_i >> maxIJ_j
        >> vali >> valj >> valij
        >> pvali >> pvalj >> pvalij
        >> prvali >> prvalj >> prvalij
        >> iowner >> jowner
        >> delf_i >> delf_j;
    }
}; // End of vertex data


// /**
//  * The data associated with a pair of factors in a pairwise MRF
//  */
//struct edge_data : public graphlab::IS_POD_TYPE 
//{
//    // primal labelling; We assume pairwise factors, so intersection has
//    // a single node
//    int pred_color;
//    
//    // current contribution to LP dual value
//    double dval;
//    // current contribution to MAP value
//    double pval;
//    // current contribution to MAPrep value
//    double prval;
//    
//    edge_data():
//    pred_color(0),
//    dval(0), pval(0), prval(0)
//    {}
//     
//    void save(graphlab::oarchive& arc) const 
//    {
//        arc << pred_color
//        << dval << pval << prval; 
//    }
//    void load(graphlab::iarchive& arc) 
//    {
//        arc >> pred_color
//        >> dval >> pval >> prval;
//    }
//}; // End of edge data
typedef graphlab::empty edge_data;

/**
 * The graph type
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


// GraphLab Vertex Program ====================================================
/**
 * The type passed around during the gather phase
 */
struct gather_type 
{
    vector delf_i, delf_j;
    
    gather_type& operator+=(const gather_type& other) 
    {
        if(!other.delf_i.size() == 0) 
        {
            if(delf_i.size() == 0) delf_i = other.delf_i;
            else delf_i += other.delf_i;
        }
        if(!other.delf_j.size() == 0) 
        {
            if(delf_j.size() == 0) delf_j = other.delf_j;
            else delf_j += other.delf_j;
        }
        return *this;
    } // end of operator +=
    void save(graphlab::oarchive& arc) const 
    {
        arc << delf_i << delf_j;
    }
    void load(graphlab::iarchive& arc) 
    {
        arc >> delf_i >> delf_j;
    }
}; // end of gather type


/** 
 * The core belief propagation update function.  This update satisfies
 * the graphlab update_function interface.  
 */
class mplp_vertex_program : 
public graphlab::ivertex_program<graph_type, gather_type, 
graphlab::messages::sum_priority>,
public graphlab::IS_POD_TYPE {
private:
    double priority;
public:
    
    mplp_vertex_program() : priority(0) { }
    
    // void save(graphlab::oarchive& arc) const { /** save members */ }
    // void load(graphlab::iarchive& arc) { /** load members */ }
    
    /**
     * This function is now called in the main by invoking:
     * engine.transform_vertices(mplp_vertex_program::init)
     */
    static void init_vertex_data(icontext_type& context, vertex_type& vertex)
    { 
        vertex_data& vdata = vertex.data();
        
        // Create zero messages
        vdata.delf_i = vector::Zero(NCOLORS);
        vdata.delf_j = vector::Zero(NCOLORS);
        
        // create temporary node potentials
        vector theta_i = make_unary_potential(vertex, 'i');
        vector theta_j = make_unary_potential(vertex, 'j');

        // if we own i
        if (vdata.iowner) 
        {
            // get dual contribution
            vdata.vali = theta_i.maxCoeff(&vdata.maxI); 

            // get primal contribution
            vdata.pred_color_i = vdata.maxI;
            vdata.pvali = vdata.vali;
            
            // also update the global copy
            PRED_COLOR[vdata.i] = vdata.pred_color_i;

            // get rep primal contribution
            vdata.prvali = vdata.pvali;
        }
        else // if we don't own then just copy over the global predicted color
            vdata.pred_color_i = PRED_COLOR[vdata.i];

        // if we own j
        if (vdata.jowner) 
        {
            // get dual contribution
            vdata.valj = theta_j.maxCoeff(&vdata.maxJ); 
            
            // get primal contribution
            vdata.pred_color_j = vdata.maxJ;
            vdata.pvalj = vdata.valj;
            
            // also update the global copy
            PRED_COLOR[vdata.j] = vdata.pred_color_j;

            // get rep primal contribution
            vdata.prvalj = vdata.pvalj;
        }
        else 
            vdata.pred_color_j = PRED_COLOR[vdata.j];
        
        // we always own edge i,j
        vdata.valij = THETA_ij.maxCoeff(&vdata.maxIJ_i,&vdata.maxIJ_j); 
        vdata.pvalij = THETA_ij(vdata.pred_color_i, vdata.pred_color_j);
        vdata.prvalij = vdata.pvalij;
             
        mutex.lock();
        LPval += vdata.vali; LPval += vdata.valj; LPval += vdata.valij;
        MAPval += vdata.pvali; MAPval += vdata.pvalj; MAPval += vdata.pvalij;
        MAPrepval += vdata.prvali; MAPrepval += vdata.prvalj; MAPrepval += vdata.prvalij;
        mutex.unlock();

        // debug code to check in all nodes are inited
        if (vinit[vertex.id()] == 0)
            vinit[vertex.id()] = 1;
    }
    
    /**
     * Recv message is called by the engine to receive a message to this
     * vertex program.  The vertex program can use this to initialize
     * any state before entering the gather phase.  If the vertex
     * program does not implement this function then the default
     * implementation (NOP) is used.
     */
    // void init(icontext_type& context, const vertex_type& vertex, 
    //                   const message_type& msg) { /** NOP */ }
    
    /**
     * Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type gather_edges(icontext_type& context,
                               const vertex_type& vertex) const { 
        return graphlab::ALL_EDGES; 
    }; // end of gather_edges 
    
    
    // Run the gather operation over all in edges
    gather_type gather(icontext_type& context, const vertex_type& target_vertex, 
                       edge_type& edge) const 
    {
        const vertex_type source_vertex = get_other_vertex(edge, target_vertex);   
        const vertex_data& source_vdata = source_vertex.data();
        const vertex_data& target_vdata = target_vertex.data();
        
        // Accumulate message
        gather_type ret_value;
        if (target_vdata.i == source_vdata.i)
            ret_value.delf_i = source_vdata.delf_i;
        else if (target_vdata.j == source_vdata.i)
            ret_value.delf_j = source_vdata.delf_i;
        else if (target_vdata.i == source_vdata.j)
            ret_value.delf_i = source_vdata.delf_j;
        else if (target_vdata.j == source_vdata.j)
            ret_value.delf_j = source_vdata.delf_j;
        else assert(false); // invalid state
        
        return ret_value;
    } // end of gather
    
    /** Update the dual parameters */
    void apply(icontext_type& context, vertex_type& vertex, 
               const gather_type& sum) 
    {
        // Make sure this vertex has neighbours. Everyone should have neighbours
        ASSERT_GT(vertex.num_in_edges() + vertex.num_out_edges(), 0);
        
        vertex_data& vdata = vertex.data();  
        vector theta_i = make_unary_potential(vertex, 'i');
        vector theta_j = make_unary_potential(vertex, 'j');
                
        ASSERT_EQ(THETA_ij.rows(), theta_i.size());
        ASSERT_EQ(THETA_ij.rows(), sum.delf_i.size());
        ASSERT_EQ(THETA_ij.cols(), theta_j.size());
        ASSERT_EQ(THETA_ij.cols(), sum.delf_j.size());   
        
        // debug code to check in all nodes are applied
        if (vapply[vertex.id()] == 0)
            vapply[vertex.id()] = 1;
        
        
        ////////////////////////////////////////////
        // Update outgoing messages (coordinate descent)
        
        // Backup the old prediction
        const vector old_delf_i = vdata.delf_i;
        const vector old_delf_j = vdata.delf_j;
        
        // Update del fi
        vdata.delf_i = -(theta_i + sum.delf_i)/2 + 
        (THETA_ij + (theta_j + sum.delf_j).transpose().replicate(THETA_ij.rows(),1)).
        rowwise().maxCoeff()/2;
        // Update del fj
        vdata.delf_j = -(theta_j + sum.delf_j)/2 + 
        ((THETA_ij + (theta_i + sum.delf_i).replicate(1,THETA_ij.cols())).
         colwise().maxCoeff()).transpose()/2;
        
        ////////////////////////////////////////////
        // Compute contributions to dual, primal and rep primal
        
        // Remove contribution of old labels from LPval
        double LPremove=0, MAPremove=0, MAPrepremove=0;
        LPremove += vdata.vali; LPremove += vdata.valj; LPremove += vdata.valij;
        MAPremove += vdata.pvali; MAPremove += vdata.pvalj; MAPremove += vdata.pvalij;
        MAPrepremove += vdata.prvali; MAPrepremove += vdata.prvalj; MAPrepremove += vdata.prvalij;

        // Update dual, primal and rep primal contributions. 
        // TODO: if primal labelling changes at a node we own, update it's edge potential too
        if (vdata.iowner)
        {
            // reparameterized node potential
            vector thetarep_i = theta_i + sum.delf_i + vdata.delf_i;
            
            vdata.vali = thetarep_i.maxCoeff(&vdata.maxI);
            
            vdata.pred_color_i = vdata.maxI;
            vdata.pvali = theta_i[vdata.pred_color_i];
            
            PRED_COLOR[vdata.i] = vdata.pred_color_i;
            
            vdata.prvali = thetarep_i[vdata.pred_color_i];
        } 
        else
            vdata.pred_color_i = PRED_COLOR[vdata.i];
        if (vdata.jowner)
        {
            // reparameterized node potential
            vector thetarep_j = theta_j + sum.delf_j + vdata.delf_j;
            
            vdata.valj = thetarep_j.maxCoeff(&vdata.maxJ);
            
            vdata.pred_color_j = vdata.maxJ;
            vdata.pvalj = theta_j[vdata.pred_color_j];
            
            PRED_COLOR[vdata.j] = vdata.pred_color_j;

            vdata.prvalj = thetarep_j[vdata.pred_color_j];
        }
        else
            vdata.pred_color_j = PRED_COLOR[vdata.j];
        
        // We always own edge i,j
        matrix thetarep_ij = THETA_ij - (vdata.delf_i.replicate(1,THETA_ij.cols()))
                            - (vdata.delf_j.transpose().replicate(THETA_ij.rows(),1));
        
        vdata.valij = thetarep_ij.maxCoeff(&vdata.maxIJ_i, &vdata.maxIJ_j);
        vdata.pvalij = THETA_ij(vdata.pred_color_i, vdata.pred_color_j);
        vdata.prvalij = thetarep_ij(vdata.pred_color_i, vdata.pred_color_j);
        
        mutex.lock();
        LPval -= LPremove; MAPval -= MAPremove; MAPrepval -= MAPrepremove;
        LPval += vdata.vali; LPval += vdata.valj; LPval += vdata.valij;
        MAPval += vdata.pvali; MAPval += vdata.pvalj; MAPval += vdata.pvalij;
        MAPrepval += vdata.prvali; MAPrepval += vdata.prvalj; MAPrepval += vdata.prvalij;
        mutex.unlock();
        
        ////////////////////////////////////////////
        // Debugging printing and residuals
        
        //std::cout << vertex.id() << ": " << vdata.i << "," << vdata.j << "\n";
        if (0) // (vdata.i == 0 )
        {
            mutex.lock();
            std::cout << "Applying at vertex: " << vertex.id() << "(" << vdata.i << "," << vdata.j << ")\n";
            
            std::cout << LPval << "," << MAPval << "," << MAPrepval << "\t" ;        
            int vinitsum = 0, vapplysum = 0;
            //std::vector<graphlab::atomic<int> >::iterator it;
            std::vector<int>::iterator it;
            for (it = vinit.begin(); it < vinit.end(); ++it)
                vinitsum += *it;
            for (it = vapply.begin(); it < vapply.end(); ++it)
                vapplysum += *it;
            
            // if all vertices have been visited start counting again
            if (vapply.size() == vapplysum)
                for (int i=0; i!=vapply.size(); ++i)
                    vapply[i] = 0;
            
            std::cout << "Verted Id: " << vertex.id() << " "  << vdata.i << "," << vdata.j << " "; 
            std::cout << "Inited: " << vinitsum << " Applied: " << vapplysum << "\n";
            if (vinit.size() == vinitsum)
                std::cout << "Restarting counting of apply\n";
            std::cout.flush();
            mutex.unlock();
        }
        
        if (0)//vdata.i == 1) 
        {
            std::cout << "\n\n";
            
            std::cout << "Pairwise Potential\n" << THETA_ij << "\n\n";
            std::cout << "theta_ij reparameterized: \n" <<         
            (THETA_ij - vdata.delf_i.replicate(1,THETA_ij.cols()) 
             - vdata.delf_j.transpose().replicate(THETA_ij.rows(),1)   )  << "\n\n";
            std::cout << "maxIJ_i: " << vdata.maxIJ_i << " maxIJ_j: " << vdata.maxIJ_j << "\n\n";
            
            std::cout << "thetai \n" << theta_i << "\n\n";
            std::cout << "sum of incomming messages into i\n" << sum.delf_i << "\n\n";
            std::cout << "outgoing message to i\n" << vdata.delf_i << "\n\n";
            std::cout << " Reparamterized thetai\n" << (theta_i + sum.delf_i + vdata.delf_i) << "\n\n";
            std::cout << "maxI: " << vdata.maxI << "\n\n";
            
            std::cout << "thetaj \n" << theta_j << "\n\n";
            std::cout << "sum of incomming messages into j\n" << sum.delf_j << "\n\n";
            std::cout << "outgoing message to j\n" << vdata.delf_j << "\n\n";
            std::cout << " Reparamterized thetaj\n" << (theta_j + sum.delf_j + vdata.delf_j) << "\n\n";
            std::cout << "maxJ: " << vdata.maxJ << "\n\n";
            
            std::cout << "thetaij + j message\n" << (THETA_ij + sum.delf_j.transpose().replicate(THETA_ij.rows(),1))/2 << "\n\n";
            
            std::cout << (THETA_ij + sum.delf_j.transpose().replicate(THETA_ij.rows(),1)).
            rowwise().maxCoeff()/2 << std::endl << std::endl;
            
            std::cout << "thetaij + i message\n" << (THETA_ij + sum.delf_i.replicate(1,THETA_ij.cols()))/2 << "\n\n";
            std::cout << 	  ((THETA_ij + sum.delf_i.replicate(1,THETA_ij.cols())).
                               colwise().maxCoeff()).transpose()/2 << "\n\n";
            
            std::cout << "Old del_fi\n" << vdata.delf_i << "\n\n";
            std::cout << "Old del_fj\n" << vdata.delf_j << "\n\n";
            std::cout << "New del_fi\n" << -(theta_i + sum.delf_i)/2 + 
            (THETA_ij + (theta_j + sum.delf_j).transpose().replicate(THETA_ij.rows(),1)).
            rowwise().maxCoeff()/2 << "\n\n";
            
            std::cout << "New del_fj\n" << -(theta_j + sum.delf_j)/2 + 
            ((THETA_ij + (theta_i + sum.delf_i).replicate(1,THETA_ij.cols())).
             colwise().maxCoeff()).transpose()/2 << "\n\n";
            
            getchar();
        }
        
        // const double residual = (vdata.delf_i - old_delf_i).cwiseAbs().sum() +
        // (vdata.delf_j - old_delf_j).cwiseAbs().sum();
        
        //priority = residual;
        priority = LPval - MAPval;
        //std::cout << "priority: " << priority << std::endl;
        //std::cout << LPval << std::endl;
        //test code; for now, only run 1 iteration
        //priority = 0;
    } // end of apply
    
    /**
     * Since the MRF is undirected we will use all edges for gather and
     * scatter
     */
    edge_dir_type scatter_edges(icontext_type& context,
                                const vertex_type& vertex) const { 
        //return priority < BOUND? graphlab::NO_EDGES : graphlab::ALL_EDGES; 
        return graphlab::ALL_EDGES;
    }; // end of gather_edges 
    
    
    /** reschedule neighbors with a given priority and updated
     predictions on each edge*/
    void scatter(icontext_type& context, const vertex_type& vertex, 
                 edge_type& edge) const {  
        context.signal(get_other_vertex(edge, vertex), priority);
    } // end of scatter
    
private:
    
    /**
     * Construct the unary evidence potential
     */
    static vector make_unary_potential(const vertex_type& vertex, 
                                       const char varid) {
        vector potential(NCOLORS);
        const double obs = varid == 'i'? 
        vertex.data().obs_color_i : vertex.data().obs_color_j;
        const double sigmaSq = SIGMA*SIGMA;
        for(int pred = 0; pred < potential.size(); ++pred) {
            potential(pred) = -(obs - pred)*(obs - pred) / (2.0 * sigmaSq);
        }
        //potential /= std::abs(potential.sum());
        
        //float tmp = potential.minCoeff();
        //potential.array() -= tmp; // (float) potential.minCoeff();
        return potential;
    } // end of make_potentail
    
    /**
     * Return the other vertex
     */
    vertex_type get_other_vertex(edge_type& edge, 
                                 const vertex_type& vertex) const {
        return vertex.id() == edge.source().id()? edge.target() : edge.source();
    } // end of other_vertex
    
}; // end of MPLP vertex program


/**
 * Define the engine type
 */
//typedef graphlab::synchronous_engine<mplp_vertex_program> engine_type;
typedef graphlab::async_consistent_engine<mplp_vertex_program> engine_type;
//typedef graphlab::asynchronous_consistent_engine<mplp_vertex_program> engine_type;


/////////////////////////////////////////////////////////////////////////////////////
// Aggregator functions to compute primal & dual values
double get_energy_fun(mplp_vertex_program::icontext_type& context, const mplp_vertex_program::vertex_type& vertex) 
{
    double tmp = 0;
    
    const vertex_data &vdata = vertex.data();
    
    if (vdata.iowner)
        tmp += vdata.vali;
    if (vdata.jowner)
        tmp += vdata.valj;
    tmp += vdata.valij;

    return tmp;
}

void finalize_fun(mplp_vertex_program::icontext_type& context, double total) 
{
    if(context.procid() == 0) 
        std::cout << "Dual value: " << total << std::endl;
}


// Helper functions ===========================================================>
graphlab::vertex_id_type pixel_ind(size_t rows, size_t cols,
                                   size_t r, size_t c) {
    return r * cols + c;
}; // end of pixel_ind


graphlab::vertex_id_type factor_ind(size_t rows, size_t cols,
                                    size_t i, size_t j) {
    if(i > j) std::swap(i,j);
    return i * (rows * cols) + j;
}; // end of factor_ind

graphlab::vertex_id_type factor_ind2(size_t rows, size_t cols,
                                    size_t i, size_t j) {
    if(i > j) std::swap(i,j);
    
    if (j == (i+1)) // horizontal edge
        return i - std::floor(i/cols);
    else if (j == (i+cols))
        return rows*(cols-1) + i;
    else
      std::cout << "Problem ";
      //ASSERT_TRUE(false);
    return 0;
}; // end of factor_ind


void create_synthetic_cluster_graph(graphlab::distributed_control& dc,
                                    graph_type& graph,
                                    const size_t rows, const size_t cols) {
    dc.barrier();
    // Generate the image on all machines --------------------------------------->
    // Need to ensure that all machines generate the same noisy image
    graphlab::random::generator gen; gen.seed(314);
    std::vector<float>    obs_pixels(rows * cols);
    std::vector<uint16_t> true_pixels(rows * cols);
    const double center_r = rows / 2.0;
    const double center_c = cols / 2.0;
    const double max_radius = std::min(rows, cols) / 2.0;
    for(size_t r = 0; r < rows; ++r) 
    {
        for(size_t c = 0; c < cols; ++c) 
        {
            // Compute the true pixel value
            const double distance = sqrt((r-center_r)*(r-center_r) + 
                                         (c-center_c)*(c-center_c));
            // Compute ring of sunset
            const uint16_t ring_color =  
            std::floor(std::min(1.0, distance/max_radius) * (NCOLORS - 1) );
            // Compute the true pixel color by masking with the horizon
            const uint16_t true_color = r < rows/2 ? ring_color : 0;
            // compute the predicted color
            const float obs_color = true_color + gen.normal(0, SIGMA);
            // determine the true pixel id
            const size_t pixel = pixel_ind(rows,cols,r,c);
            true_pixels[pixel] = true_color; obs_pixels[pixel] = obs_color;
        } // end of loop over cols
    } // end of loop over rows
    
    if(dc.procid() == 0) 
    {
        //int nedges = 2*rows*cols -rows-cols;
        int nedges = factor_ind2(rows,cols,rows*cols-cols,rows*cols);
        //vinit = vector::Zero(2*rows*cols -rows-cols);
        //vapply = vector::Zero(2*rows*cols -rows-cols);
        vinit.clear(); vinit.resize(nedges, 0);
        vapply.clear(); vapply.resize(nedges,0);

        int max_vid = 0; 
        
        PRED_COLOR = vector::Zero(rows*cols);
        int ownercount = 0; 
        
        // temp code 
        std::ofstream ne, ee; 
        ne.open("./node_en.txt");
        //ee.open("./edge_en.txt");
        
        ne << rows*cols << " " << NCOLORS << " "
        << 0 << " " << rows << " " << cols << std::endl;
        // end temp
        
        std::vector<graphlab::vertex_id_type> nbrs;
        // load the graph
        for(size_t r = 0; r < rows; ++r) 
        {
            for(size_t c = 0; c < cols; ++c) 
            {
                // temp code to write out potential to file:
                std::vector<double> potential(NCOLORS);
                const double obs = obs_pixels[pixel_ind(rows,cols,r,c)];
                const double sigmaSq = SIGMA*SIGMA;
                double sum = 0;
                for(int pred = 0; pred < potential.size(); ++pred) 
                {
                    potential[pred] = +(obs - pred)*(obs - pred) / (2.0 * sigmaSq);
                    sum += potential[pred];
                }
                //                for(int pred = 0; pred < potential.size(); ++pred)                 
                //                    potential[pred] /= sum;
                //                ne << true_pixels[pixel_ind(rows,cols,r,c)] << " " <<  obs << " " << potential << std::endl;
                //                ne << 4 - int((r==0)||(r==(rows-1))) - int((c==0)||(c==(cols-1))) << " " << potential << std::endl;
                ne << potential << std::endl;
                // end temp
                
                // Add the two vertices (factors to the right and below this
                // pixel)
                if(r + 1 < rows) 
                {
                    vertex_data vdata;
                    vdata.i = pixel_ind(rows,cols,r,c);
                    vdata.j = pixel_ind(rows,cols,r+1,c);
                    vdata.deg_i = 4 - int((r==0)||(r==(rows-1))) - int((c==0)||(c==(cols-1)));
                    vdata.deg_j = 4 - int(((r+1)==0)||((r+1)==(rows-1))) - int((c==0)||(c==(cols-1)));
                    vdata.obs_color_i = obs_pixels[vdata.i];
                    vdata.obs_color_j = obs_pixels[vdata.j];
                    graph.add_vertex(factor_ind2(rows,cols,vdata.i,vdata.j), vdata);
                    
                    // temp code
                    max_vid = std::max((int)factor_ind2(rows,cols,vdata.i,vdata.j), max_vid); 
                }
                if(c + 1 < cols) 
                {
                    vertex_data vdata;
                    vdata.i = pixel_ind(rows,cols,r,c);
                    vdata.j = pixel_ind(rows,cols,r,c+1);
                    vdata.deg_i = 4 - int((r==0)||(r==(rows-1))) - int((c==0)||(c==(cols-1)));
                    vdata.deg_j = 4 - int((r==0)||(r==(rows-1))) - int(((c+1)==0)||((c+1)==(cols-1)));
                    vdata.obs_color_i = obs_pixels[vdata.i];
                    vdata.obs_color_j = obs_pixels[vdata.j];

                    vdata.iowner = true; // give i-ownership to horizontal edges
                    ++ownercount;
                    if ((c+1)==(cols-1)) // and j-ownership too if last node in this row
                    {
                        vdata.jowner = true;
                        ++ownercount;
                    }
                    graph.add_vertex(factor_ind2(rows,cols,vdata.i,vdata.j), vdata);
                    
                    // temp code
                    max_vid = std::max((int)factor_ind2(rows,cols,vdata.i,vdata.j), max_vid); 
                }
                // Compute all the factors that contain this pixel
                nbrs.clear();
                if(r+1 < rows)
                    nbrs.push_back(factor_ind2(rows,cols,
                                              pixel_ind(rows,cols,r,c),
                                              pixel_ind(rows,cols,r+1,c)));
                if(r-1 < rows)
                    //if(r-1 >= 0)
                    nbrs.push_back(factor_ind2(rows,cols,
                                              pixel_ind(rows,cols,r-1,c),
                                              pixel_ind(rows,cols,r,c)));
                if(c+1 < cols)
                    nbrs.push_back(factor_ind2(rows,cols,
                                              pixel_ind(rows,cols,r,c),
                                              pixel_ind(rows,cols,r,c+1)));
                if(c-1 < cols)
                    //if(c-1 >= 0)
                    nbrs.push_back(factor_ind2(rows,cols,
                                              pixel_ind(rows,cols,r,c-1),
                                              pixel_ind(rows,cols,r,c)));
                // construct the clique over the factors
                for(size_t i = 0; i < nbrs.size(); ++i) 
                {
                    for(size_t j = i+1; j < nbrs.size(); ++j) 
                    {
                        graph.add_edge(nbrs[i], nbrs[j]);
                    }
                }
            } // end of for cols
        } // end of for rows
        
        // temp code
        ne.close(); //ee.close();
        std::cout << "Max vid fed into graphlab: " << max_vid << "\n";
        std::cout << "No. of owners: " << ownercount << "\n";
    } // end of if proc 0
    dc.barrier();
} // end of create synthetic cluster graph


void initialize_theta_ij(const std::string& smoothing,
                         const double lambda) 
{
    THETA_ij.resize(NCOLORS, NCOLORS);
    // Set the smoothing type
    if(smoothing == "laplace") 
    {
        for(int i = 0; i < THETA_ij.rows(); ++i) 
            for(int j = 0; j < THETA_ij.cols(); ++j) 
                THETA_ij(i,j) = -std::abs(double(i) - double(j)) * lambda;
    } 
    else 
    {   
        for(int i = 0; i < THETA_ij.rows(); ++i) 
            for(int j = 0; j < THETA_ij.cols(); ++j) 
                THETA_ij(i,j) = -(i == j? 0 : lambda);
    } 
} // end of initialize_theta_ij


template<typename T>
struct merge_reduce {
    std::set<T> values;
    void save(graphlab::oarchive& arc) const { arc << values; }
    void load(graphlab::iarchive& arc) { arc >> values; }
    merge_reduce& operator+=(const merge_reduce& other) {
        values.insert(other.values.begin(), other.values.end());
        return *this;
    }
}; // end of merge_reduce

typedef std::pair<graphlab::vertex_id_type, float> pred_pair_type; 
typedef merge_reduce<pred_pair_type> merge_reduce_type;

merge_reduce_type pred_map_function(graph_type::vertex_type vertex) {
    merge_reduce<pred_pair_type> ret;
    ret.values.insert(pred_pair_type(vertex.data().i, vertex.data().pred_color_i));
    ret.values.insert(pred_pair_type(vertex.data().j, vertex.data().pred_color_j));
    return ret;
} // end of pred_map_function

merge_reduce_type obs_map_function(graph_type::vertex_type vertex) {
    merge_reduce<pred_pair_type> ret;
    ret.values.insert(pred_pair_type(vertex.data().i, vertex.data().obs_color_i));
    ret.values.insert(pred_pair_type(vertex.data().j, vertex.data().obs_color_j));
    return ret;
} // end of obs_map_function


std::pair<int,int> ind2sub(size_t rows, size_t cols,
                           size_t ind) {
    return std::make_pair(ind / cols, ind % cols);
}; // end of sub2ind


// /**
//  * Saving an image as a pgm file.
//  */
// void save_image(const size_t rows, const size_t cols,
//                 const std::set<pred_pair_type>& values,
//                 const std::string& fname) {
//     std::cout << "NPixels: " << values.size() << std::endl;
//     image img(rows, cols);
//     foreach(pred_pair_type pair, values) 
//     img.pixel(pair.first) = pair.second;
//     img.save(fname);
// } // end of save_image


/**
 * Saving an image as a pgm file.
 */

void save_image(const size_t rows, const size_t cols,
                const std::set<pred_pair_type>& values,
                const std::string& fname) {
  std::cout << "NPixels: " << values.size() << std::endl;
  // determine the max and min colors
  float max_color = -std::numeric_limits<float>::max();
  float min_color =  std::numeric_limits<float>::max();
  foreach(pred_pair_type pair, values) {
    max_color = std::max(max_color, pair.second);
    min_color = std::min(min_color, pair.second);
  }

  cv::Mat img(cols, rows, CV_8UC1);
  foreach(pred_pair_type pair, values) {
    std::pair<int,int> coords = ind2sub(rows,cols, pair.first);
    float value = (pair.second - min_color) / (max_color - min_color);
    int color = 255 * value > 255 ? 255 : 255 * value;
    img.at<unsigned char>(coords.first, coords.second) = color;
  }
  cv::imwrite(fname, img);
}


// MAIN =======================================================================>
int main(int argc, char** argv) {
    std::cout << "This program creates and denoises a synthetic " << std::endl
    << "image using loopy belief propagation inside " << std::endl
    << "the graphlab framework." << std::endl;
    
    // // set the global logger
    // global_logger().set_log_level(LOG_WARNING);
    // global_logger().set_log_to_console(true);
    
    // Set initial values for members ------------------------------------------->
    NCOLORS = 5;
    SIGMA = 2;
    BOUND = 1E-4;
    
    
//    size_t nrows = 200;
//    size_t ncols = 200;
    size_t nrows = 20;
    size_t ncols = 20;
    double lambda = 0.2;
    
    std::string smoothing = "square";
    
    std::string orig_fn =  "source_img.jpeg";
    std::string noisy_fn = "noisy_img.jpeg";
    std::string pred_fn = "pred_img.jpeg";
    
    // std::string orig_fn =  "source_img.pgm";
    // std::string noisy_fn = "noisy_img.pgm";
    // std::string pred_fn = "pred_img.pgm";
    
    
    // Parse command line arguments --------------------------------------------->
    graphlab::command_line_options clopts("Loopy BP image denoising");
    clopts.attach_option("bound", BOUND,
                         "Residual termination bound");
    clopts.attach_option("ncolors", NCOLORS,
                         "The number of colors in the noisy image");
    clopts.attach_option("sigma", SIGMA,
                         "Standard deviation of noise.");
    clopts.attach_option("nrows", nrows,
                         "The number of rows in the noisy image");
    clopts.attach_option("ncols", ncols,
                         "The number of columns in the noisy image");
    clopts.attach_option("lambda", lambda,
                         "Smoothness parameter (larger => smoother).");
    clopts.attach_option("smoothing", smoothing,
                         "Options are {square, laplace}");
    clopts.attach_option("orig", orig_fn,
                         "Original image file name.");
    clopts.attach_option("noisy", noisy_fn,
                         "Noisy image file name.");
    clopts.attach_option("pred", pred_fn,
                         "Predicted image file name.");
    
    ///! Initialize control plain using mpi
    graphlab::mpi_tools::init(argc, argv);
    const bool success = clopts.parse(argc, argv);
    if(!success) {
        clopts.print_description();
        graphlab::mpi_tools::finalize();
        return EXIT_FAILURE;
    }
    
    ///! Create a distributed control object 
    graphlab::distributed_control dc;
    ///! display settings  
    if(dc.procid() == 0) {
        std::cout << "ncpus:          " << clopts.get_ncpus() << std::endl
        << "bound:          " << BOUND << std::endl
        << "colors:         " << NCOLORS << std::endl
        << "nrows:           " << nrows << std::endl
        << "ncols:           " << ncols << std::endl
        << "sigma:          " << SIGMA << std::endl
        << "lambda:         " << lambda << std::endl
        << "smoothing:      " << smoothing << std::endl
        << "scheduler:      " << clopts.get_scheduler_type() << std::endl
        << "orig_fn:        " << orig_fn << std::endl
        << "noisy_fn:       " << noisy_fn << std::endl
        << "pred_fn:        " << pred_fn << std::endl;
    }
    
    
    // Create synthetic images -------------------------------------------------->
    std::cout << "Creating a synthetic noisy image." << std::endl;
    graph_type graph(dc, clopts);
    create_synthetic_cluster_graph(dc, graph, nrows, ncols);
    std::cout << "Finalizing the graph." << std::endl;
    graph.finalize();
    
    std::cout << "Collect the noisy image. " << std::endl;
    merge_reduce_type obs_image = 
    graph.map_reduce_vertices<merge_reduce_type>(obs_map_function);
    std::cout << "saving the noisy image." << std::endl;
    if(dc.procid() == 0) {
        save_image(nrows, ncols, obs_image.values, noisy_fn);
    }
    
    // Initialze the edge factor ----------------------------------------------->
    std::cout << "Initializing shared edge factor. " << std::endl;
    // dummy variables 0 and 1 and num_rings by num_rings
    initialize_theta_ij(smoothing, lambda);
    if(dc.procid() == 0) std::cout << THETA_ij << std::endl;
    
    // Create the engine -------------------------------------------------------->
    std::cout << "Creating the engine. " << std::endl;
    engine_type engine(dc, graph, clopts);

    engine.add_vertex_aggregator<double>("energy", get_energy_fun, finalize_fun);
    engine.aggregate_periodic("energy", 3); // run every 3 seconds

    engine.transform_vertices(mplp_vertex_program::init_vertex_data);

    std::cout << "Scheduling all vertices" << std::endl;
    engine.signal_all();
    std::cout << "Starting the engine" << std::endl;
    engine.start();
    const float runtime = engine.elapsed_seconds();
    size_t update_count = engine.num_updates();
    std::cout << "Finished Running engine in " << runtime 
    << " seconds." << std::endl
    << "Total updates: " << update_count << std::endl
    << "Efficiency: " << (double(update_count) / runtime)
    << " updates per second "
    << std::endl;  
    
    
    // Saving the output -------------------------------------------------------->
    std::cout << "Saving the predicted image" << std::endl;
    std::cout << "Collect the noisy image. " << std::endl;
    merge_reduce_type pred_image = 
    graph.map_reduce_vertices<merge_reduce_type>(pred_map_function);
    std::cout << "saving the pred image." << std::endl;
    if(dc.procid() == 0) {
        save_image(nrows, ncols, pred_image.values, pred_fn);
    }
    
    std::cout << "Done!" << std::endl;
    graphlab::mpi_tools::finalize();
    return EXIT_SUCCESS;
} // End of main


================================================
FILE: toolkits/graphical_models/mplp_structured_prediction.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application used for structured prediction on a graph.
 * For example, suppose you want to model the interests of users in a
 * social network.  
 *
 * Overview and Usage
 * ======================
 *
 * For simplicity lets suppose you want to know the users interest in
 * the categories movies, sports, and music.  After analyzing each
 * users profile you might be able to estimate a crude distribution
 * over her interests.  However you would like to leverage similarity
 * among friends to improve your estimates.  This application is
 * designed to do exactly that.
 *
 * As an input you provide two folders (or files) the first contains
 * the prior probabilities for each vertex in the form:
 *
 *   <vertexId> \t <Pr Category1> \t <Pr Category2> ... \n
 *
 * For example:
 *   
 *   1    0.2   0.2   0.6
 *   2    0.3   0.6   0.1
 *   3    0.3   0.3   0.4
 *           ... 
 * 
 * The second folder contains the graph structure in the form:
 *
 *   <sourceId> \t <targetId> \t [Optional Weight]
 *
 * For example:
 *
 *   1   2
 *   1   3  1.7
 *   3   2  0.3
 *
 * The default weight value is 1 (times the smoothing parameter passed
 * in as a command line argument).  Larger weight values imply
 * stronger relationships.  A negative weight implies a "repulsive"
 * relationship in which neighboring vertices would like to have
 * different assignments.
 *
 * We have provided a synthetic data generator which creates a
 * synthetic dataset for an simulated image denoising task. See the
 * synthetic_image_data application for details.
 *
 * As output the application produces another set files with a format
 * identical to the vertex prior file with each weight (probability)
 * corresponding to the posterior predictions.
 *
 * Technical Explanation
 * ========================
 *
 * This application creates a pair-wise Markov Random Field with
 * Ising-Potts edge factors and then uses residual loopy belief
 * propagation to compute posterior belief estimates for each vertex.
 *
 *
 *  \author Dhruv Batra 
 */


#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <Eigen/Dense>
#include "eigen_serialization.hpp"


#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


typedef Eigen::VectorXd vector;
typedef Eigen::MatrixXd matrix;


/**
 * \brief The Ising smoothing parameter which controls the coupling
 * between adjacent predictions in the graph.  Larger values imply
 * greater smoothing (stronger coupling). 
 *
 * \code
 * edge_factor(xi, xj) = exp( (xi == xj)? 0 : -SMOOTHING * edge_weight ); 
 * \endcode
 *
 * Not that the default edge weight is 1 however the graph file can
 * contain an additional edge weight column which allows per edge
 * control of the smoothing parameter.
 *
 * This parameter is set as a command line argument.
 */
double SMOOTHING = 2;


/**
 * \brief The convergence threshold for each message.  Smaller values
 * imply tighter convergence but slower execution.
 *
 *
 * The algorithm convergence when:
 *   
 * \code
 * sum(abs(log(old_message) - log(new_message))) < TOLERANCE
 * \endcode
 *
 * The parameter is set as a command line argument
 */
double TOLERANCE = 0.01;


/**
 * \brief The vertex data contains the vertex potential as well as the
 * current belief estimate and represents a random variable in the
 * Markov Random Field.
 *
 * The vertex potential represents the prior and is obtained from the
 * vertex prior file (stored in log form).
 *
 * The belief represents the current posterior estimate.
 */
struct vertex_data {
  factor_type belief;
  factor_type potential;
  void load(graphlab::iarchive& arc) { arc >> belief >> potential; }
  void save(graphlab::oarchive& arc) const { arc << belief << potential; }
}; // end of vertex_data


/**
 * \brief The edge data represents an edge in the Markov Random Field
 * and contains the loopy belief propagation message in both
 * directions along that edge as well as the old message in each
 * direction.  In addition each edge contains the weight parameter
 * used to set edge specific smoothing (default value is 1).
 */
class edge_data {
  /**
   * \brief We store old and new messages in both directions as an
   * array of messages.  The particular message index is then computed
   * using the \ref message_idx function.
   */
  factor_type messages_[4];
  /**
   * \brief The weight associated with the edge (used to scale the
   * smoothing parameter)
   */
  double weight_;
  /**
   * \brief The function used to compute the message index in the edge
   * message array.
   */
  size_t message_idx(size_t source_id, size_t target_id, bool is_new) {
    return size_t(source_id < target_id)  + 2 * size_t(is_new);
  }

public:

  edge_data(const double w = 1) : weight_(w) { }
  const double& weight() const { return weight_; }

  /**
   * \brief Get the new message value from source_id to target_id
   */
  factor_type& message(size_t source_id, size_t target_id) { 
    return messages_[message_idx(source_id, target_id, true)];
  }
  /**
   * \brief Get the old message value from source_id to target_id
   */
  factor_type& old_message(size_t source_id, size_t target_id) { 
     return messages_[message_idx(source_id, target_id, false)];
  }

  /**
   * \brief Set the old message value equal to the new message value
   */
  void update_old(size_t source_id, size_t target_id) { 
    old_message(source_id, target_id) = message(source_id, target_id);
  }
  
  /**
   * \brief Initialize the edge data with source and target having the
   * appropriate number of states.
   *
   * \param source_id the vertex id of the source
   * \param nsource the number of states the source vertex takes
   * \param target_id the vertex id of the target
   * \param ntarget the number of states the target vertex takes
   */
  void initialize(size_t source_id, size_t nsource, size_t target_id, size_t ntarget) {
    ASSERT_GT(nsource, 0); ASSERT_GT(ntarget, 0);
    message(source_id, target_id).resize(ntarget);
    old_message(source_id, target_id).resize(ntarget);
    message(target_id, source_id).resize(nsource);
    old_message(target_id, source_id).resize(nsource);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 4; ++i) arc << messages_[i];
    arc << weight_;
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 4; ++i) arc >> messages_[i];
    arc >> weight_;
  }
}; // End of edge data


/**
 * \brief The graph type used to store the Markov Random Field with
 * vertex data containing node potentials and beliefs and edge data
 * containing messages and weights.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/** 
 * \brief The Loopy Belief Propagation Vertex Program which computes
 * the product of the inbound messages during the gather phase,
 * updates the belief during the apply phase, and then computes the
 * new out-bound messages during the scatter phase.
 *
 * Since the gather phase is computing the product of the inbound
 * messages and the messages are stored in log form the resulting sum
 * operation is actually a vector sum and so the gather type is simply
 * the factor type and the operator+= operation for the factor type is
 * sufficient.
 *
 */
struct bp_vertex_program : 
  public graphlab::ivertex_program< graph_type, factor_type,
                                    graphlab::messages::sum_priority >,
  public graphlab::IS_POD_TYPE {

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * \brief Update the old message to be the new message and collect the
   * message value.
   */
  factor_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Update the old message with the value of the new Message.  We
    // then receive the old message during gather and then compute the
    // "cavity" during scatter (again using the old message).
    edata.update_old(other_vertex.id(), vertex.id());
    const factor_type& recv_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    // Ensure that the received message has the correct size
    ASSERT_EQ(recv_message.size(), vertex.data().potential.size());
    return recv_message;
  }; // end of gather function

  /**
   * \brief Multiply message product by node potential and update the
   * belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
             const factor_type& total) {
    // If we have no neighbors than the belief is equal to the
    // potential so simply update the belief
    if(vertex.num_in_edges() + vertex.num_out_edges() == 0) {
      vertex.data().belief = vertex.data().potential;
    } else {
      vertex_data& vdata = vertex.data();
      ASSERT_EQ(vdata.potential.size(), total.size());
      // Multiply (add in log space) the potential to compute the belief
      vdata.belief = vdata.potential + total;
      ASSERT_GT(vdata.belief.size(), 0);
      // Rescale the belief to ensure numerical stability.  (This is
      // essentially normalization in log-space.)
      vdata.belief.array() -= vdata.belief.maxCoeff();
    }
  }; // end of apply

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /**
   * \brief Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {  
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Divide (subtract in log space) out of the belief the old in
    // message to construct the cavity
    const factor_type& old_in_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    ASSERT_EQ(old_in_message.size(), vertex.data().belief.size());
    factor_type cavity = vertex.data().belief - old_in_message;
    // compute the new message by convolving with the Ising-Potts Edge
    // factor.
    factor_type& new_out_message = 
      edata.message(vertex.id(), other_vertex.id());
    const factor_type& old_out_message = 
      edata.old_message(vertex.id(), other_vertex.id());
    convolve(cavity, edata.weight(), new_out_message);
    // Renormalize (done in log space)
    new_out_message.array() -= new_out_message.maxCoeff();
    // Apply damping to the message to stabilize convergence.
    new_out_message = DAMPING * old_out_message + 
      (1-DAMPING) * new_out_message;
    // Compute message residual
    const double residual = 
      (new_out_message - old_out_message).cwiseAbs().sum();
    context.clear_gather_cache(other_vertex);
    // Schedule the adjacent vertex
    if(residual > TOLERANCE) context.signal(other_vertex, residual);
 }; // end of scatter

private:

  /**
   * \brief Compute the convolution of the cavity with the Ising-Potts
   * edge potential and store the result in the message
   *
   * \param cavity the belief minus the in-bound message
   * \param weight the edge weight used to scale the smoothing parameter
   * \param [out] message The message in which to store the result of
   * the convolution.
   */
  inline void convolve(const factor_type& cavity, const double& weight, 
                       factor_type& message) const {
    for(size_t i = 0; i < message.size(); ++i) {
      double sum = 0;
      for(size_t j = 0; j < cavity.size(); ++j) {
        sum += std::exp( cavity(j)  + ( i == j? 0 : -(SMOOTHING*weight) ) ); 
      }
      // To try and ensure numerical stability we do not allow
      // messages to underflow in log-space
      message(i) = (sum > 0)? std::log(sum) : 
        std::numeric_limits<double>::min();
    }
  } // end of convolve
  
  /**
   * \brief Given an edge and a vertex return the other vertex along
   * that edge. 
   */
  inline vertex_type get_other_vertex(edge_type& edge, 
                                      const vertex_type& vertex) const {
    return vertex.id() == edge.source().id()? edge.target() : edge.source();
  }; // end of other_vertex

}; // end of class bp_vertex_program


/**
 * \brief The vertex load is used by the graph loading API to parse
 * the lines of prior data in the vertex data file.
 *
 * This parser uses the boost::spirit library to parse the vertex data
 * file. As a consequence it is fairly flexible allowing both comma
 * and tab delimited files as well as vertices with different numbers
 * of states.
 */
bool vertex_loader(graph_type& graph, const std::string& fname, 
                   const std::string& line) {
  // If the line is empty simply skip it
  if(line.empty()) return true;
  // We use the boost spirit parser which requires (too) many separate
  // namespaces so to make things clear we shorten them here.
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type vid(-1);
  std::vector<double> values;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(vid) = qi::_1] >> -qi::char_(",") >>
      (qi::double_[phoenix::push_back(phoenix::ref(values), qi::_1)] % -qi::char_(",") )
      )
     ,
     //  End grammar
     ascii::space); 
  // Test to see if the boost parser was able to parse the line
  if(!success) {
    logstream(LOG_ERROR) << "Parse error in vertex prior parser." << std::endl;
    return false;
  }

  // Ensure that a prior was provided.  Technically this should not be
  // reached since the parser requires at least one prior entry
  if(values.empty()) {
    logstream(LOG_ERROR) << "Vertex has no prior." << std::endl;
    return false;
  }

  // Renormalize the vertex data. We require positive probabilities.
  double sum = 0;
  for(size_t i = 0; i < values.size(); ++i) {
    if(values[i] < 0) { 
      logstream(LOG_ERROR) << "Encountered negative probability." << std::endl;
      return false;
    }
    if(values[i] == 0) { 
      logstream(LOG_ERROR) 
        << "Zero probability assignments are not currently supported." << std::endl;
      return false;
    }
    sum += values[i]; 
  }
  ASSERT_GT(sum, 0);
  for(size_t i = 0; i < values.size(); ++i) values[i] /= sum;

  vertex_data vdata;
  vdata.potential.resize(values.size());
  for(size_t i = 0; i < values.size(); ++i) {
    ASSERT_GT(values[i], 0);
    vdata.potential(i) = std::log(values[i]);
  }
  graph.add_vertex(vid, vdata);
  return true;
} // end of vertex_loader;


/**
 * \brief The edge data loader is used by the GraphLab graph loading
 * API to parse lines in the edge data file. 
 */
bool edge_loader(graph_type& graph, const std::string& fname, 
                 const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type source(-1), target(-1);
  double weight = 1;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source) = qi::_1] >>  -qi::char_(',') 
      >> qi::ulong_[phoenix::ref(target) = qi::_1] >>  
      -(-qi::char_(',') >> qi::double_[phoenix::ref(weight) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space); 
  if(!success) return false;  
  graph.add_edge(source, target, edge_data(weight));
  return true;
} // end of edge loader


/**
 * \brief The edge initializer is used to allocate the messages along
 * each edge based on the number of states of the source and target
 * vertex.
 */
void edge_initializer(graph_type::edge_type& edge) {
  edge_data& edata = edge.data();
  const graphlab::vertex_id_type source_id = edge.source().id();
  const size_t nsource = edge.source().data().potential.size(); 
  const graphlab::vertex_id_type target_id = edge.target().id();
  const size_t ntarget = edge.target().data().potential.size();
  edata.initialize(source_id, nsource, target_id, ntarget);
} // end of edge initializer


/**
 * \brief The belief prediction saver is used to save the belief
 * predictions for each vertex.
 */
struct belief_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    strm << vertex.id() << '\t';
    factor_type pred = vertex.data().belief;
    double sum = 0;
    for(size_t i = 0; i < pred.size(); ++i) 
      sum += (pred(i) = std::exp(pred(i)));
    pred.array() /= sum;
    for(size_t i = 0; i < pred.size(); ++i) 
      strm << pred(i) << (i+1 < pred.size()? '\t' : '\n');
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of belief_prediction_saver


/**
 * \brief The MAP prediction saver is used to save the map estimated
 * for each vertex.  The MAP estimate is the most likely assignment
 */
struct map_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    size_t prediction = 0;
    vertex.data().belief.maxCoeff(&prediction);
    strm << vertex.id() << '\t' << prediction << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of map prediction_saver


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // Parse command line options -----------------------------------------------
  // \todo update description string
  const std::string description = "Structure prediction solver";
  graphlab::command_line_options clopts(description);
  std::string prior_dir; 
  std::string graph_dir;
  std::string output_dir = "pred";
  bool map = false;
  clopts.attach_option("prior", &prior_dir, prior_dir,
                       "The directory containing the prior");
  clopts.add_positional("prior");
  clopts.attach_option("graph", &graph_dir, graph_dir,
                       "The directory containing the adjacency graph");
  clopts.add_positional("graph");
  clopts.attach_option("smoothing", &SMOOTHING, SMOOTHING,
                       "The amount of smoothing (larger = more)");
  clopts.attach_option("damping", &DAMPING, DAMPING,
                       "The amount of damping (0 -> no damping and 1 -> no progress)");
  clopts.attach_option("tol", &TOLERANCE, TOLERANCE,
                       "The tolerance level for convergence.");
  clopts.attach_option("output", &output_dir, output_dir,
                       "The directory in which to save the predictions");
  clopts.attach_option("map", &map, map,
                       "Return maximizing assignment instead of the posterior distribution.");
  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  if(prior_dir.empty()) {
    logstream(LOG_ERROR) << "No prior was provided." << std::endl;
    return EXIT_FAILURE;
  }

  if(graph_dir.empty()) {
    logstream(LOG_ERROR) << "No graph was provided." << std::endl;
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();

  ///! load the graph
  graph_type graph(dc, clopts);  


  ///! load the graph
  graph.load(prior_dir, vertex_loader);
  graph.load(graph_dir, edge_loader);
  graph.finalize();
  graph.transform_edges(edge_initializer);

  typedef graphlab::omni_engine<bp_vertex_program> engine_type;
  engine_type engine(dc, graph, clopts, "asynchronous");
  engine.signal_all();
  graphlab::timer timer;
  engine.start();  
  const double runtime = timer.current_time();
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    
    
  std::cout << "Saving predictions" << std::endl;
  const bool gzip_output = false;
  const bool save_vertices = true;
  const bool save_edges = false;
  const size_t threads_per_machine = 2;
  if(map) {
    graph.save(output_dir, map_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  } else { 
    graph.save(output_dir, belief_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }


  //  graphlab::stop_metric_server_on_eof();
  graphlab::stop_metric_server();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/graphical_models/profile_lbp_synthetic.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application is almost identical to LBP structured
 * prediction except that it generates an artificial field for every
 * vertex enabling the studying of distributed LBP on various graph
 * structures without the need for actual data.  
 *
 *
 * Technical Explanation
 * ========================
 *
 * This application creates a pair-wise Markov Random Field with
 * Ising-Potts edge factors and then uses residual loopy belief
 * propagation to compute posterior belief estimates for each vertex.
 *
 *
 *  \author Joseph Gonzalez
 */


#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <Eigen/Dense>
#include "eigen_serialization.hpp"


#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


/**
 * \brief Eigen library vectors are used to store factor in _LOG
 * SPACE_.
 */
typedef Eigen::VectorXd factor_type;

/**
 * \brief The Ising smoothing parameter which controls the coupling
 * between adjacent predictions in the graph.  Larger values imply
 * greater smoothing (stronger coupling). 
 *
 * \code
 * edge_factor(xi, xj) = exp( (xi == xj)? 0 : -SMOOTHING * edge_weight ); 
 * \endcode
 *
 * Not that the default edge weight is 1 however the graph file can
 * contain an additional edge weight column which allows per edge
 * control of the smoothing parameter.
 *
 * This parameter is set as a command line argument.
 */
double SMOOTHING = 2;


double FIELD = 2;
size_t NSTATES = 5;

/**
 * \brief The Damping parameter which helps ensure stable convergence.
 * Larger damping values lead to slower but more stable convergence.
 *
 * Currently damping is implemented in log-space in the following
 * equation:
 *
 * \code
 * log(new_message) = DAMPING * log(old_message) + 
 *                         (1-DAMPING) * log(new_message);
 * \endcode
 *
 * This parameter is set as a command line argument.
 */
double DAMPING = 0.1;

/**
 * \brief The convergence threshold for each message.  Smaller values
 * imply tighter convergence but slower execution.
 *
 *
 * The algorithm convergence when:
 *   
 * \code
 * sum(abs(log(old_message) - log(new_message))) < TOLERANCE
 * \endcode
 *
 * The parameter is set as a command line argument
 */
double TOLERANCE = 0.01;


bool USE_CACHE = false;


/**
 * Make a synthetic node potential
 */
factor_type make_node_potential(size_t vid) {
  // const size_t obs = vid % NSTATES;
  const size_t obs = 0;
  factor_type factor;
  factor.setZero(NSTATES);
  if(vid % 101 < 1) {
    for(size_t i = 0; i <  NSTATES; ++i) {
      factor(i) = (i == obs)? 0 : -FIELD;
    }
  } 
  return factor;
}

/**
 * \brief The vertex data contains the vertex potential as well as the
 * current belief estimate and represents a random variable in the
 * Markov Random Field.
 *
 * The vertex potential represents the prior and is obtained from the
 * vertex prior file (stored in log form).
 *
 * The belief represents the current posterior estimate.
 */
struct vertex_data {
  factor_type belief;
  void load(graphlab::iarchive& arc) { arc >> belief; }
  void save(graphlab::oarchive& arc) const { arc << belief; }
}; // end of vertex_data


/**
 * \brief The edge data represents an edge in the Markov Random Field
 * and contains the loopy belief propagation message in both
 * directions along that edge as well as the old message in each
 * direction.  In addition each edge contains the weight parameter
 * used to set edge specific smoothing (default value is 1).
 */
class edge_data {
  /**
   * \brief We store old and new messages in both directions as an
   * array of messages.  The particular message index is then computed
   * using the \ref message_idx function.
   */
  factor_type messages_[4];
  /**
   * \brief The weight associated with the edge (used to scale the
   * smoothing parameter)
   */
  double weight_;
  /**
   * \brief The function used to compute the message index in the edge
   * message array.
   */
  size_t message_idx(size_t source_id, size_t target_id, bool is_new) {
    return size_t(source_id < target_id)  + 2 * size_t(is_new);
  }

public:

  edge_data(const double w = 1) : weight_(w) { }
  const double& weight() const { return weight_; }

  /**
   * \brief Get the new message value from source_id to target_id
   */
  factor_type& message(size_t source_id, size_t target_id) { 
    return messages_[message_idx(source_id, target_id, true)];
  }
  /**
   * \brief Get the old message value from source_id to target_id
   */
  factor_type& old_message(size_t source_id, size_t target_id) { 
     return messages_[message_idx(source_id, target_id, false)];
  }

  /**
   * \brief Set the old message value equal to the new message value
   */
  void update_old(size_t source_id, size_t target_id) { 
    old_message(source_id, target_id) = message(source_id, target_id);
  }
  
  /**
   * \brief Initialize the edge data with source and target having the
   * appropriate number of states.
   *
   * \param source_id the vertex id of the source
   * \param nsource the number of states the source vertex takes
   * \param target_id the vertex id of the target
   * \param ntarget the number of states the target vertex takes
   */
  void initialize(size_t source_id, size_t nsource, size_t target_id, size_t ntarget) {
    ASSERT_GT(nsource, 0); ASSERT_GT(ntarget, 0);
    message(source_id, target_id).setZero(ntarget);
    old_message(source_id, target_id).setZero(ntarget);
    message(target_id, source_id).setZero(nsource);
    old_message(target_id, source_id).setZero(nsource);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 4; ++i) arc << messages_[i];
    arc << weight_;
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 4; ++i) arc >> messages_[i];
    arc >> weight_;
  }
}; // End of edge data


/**
 * \brief The graph type used to store the Markov Random Field with
 * vertex data containing node potentials and beliefs and edge data
 * containing messages and weights.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/** 
 * \brief The Loopy Belief Propagation Vertex Program which computes
 * the product of the inbound messages during the gather phase,
 * updates the belief during the apply phase, and then computes the
 * new out-bound messages during the scatter phase.
 *
 * Since the gather phase is computing the product of the inbound
 * messages and the messages are stored in log form the resulting sum
 * operation is actually a vector sum and so the gather type is simply
 * the factor type and the operator+= operation for the factor type is
 * sufficient.
 *
 */
struct bp_vertex_program : 
  public graphlab::ivertex_program< graph_type, factor_type,
                                    graphlab::messages::sum_priority >,
  public graphlab::IS_POD_TYPE {

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * \brief Update the old message to be the new message and collect the
   * message value.
   */
  factor_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Update the old message with the value of the new Message.  We
    // then receive the old message during gather and then compute the
    // "cavity" during scatter (again using the old message).
    edata.update_old(other_vertex.id(), vertex.id());
    const factor_type& recv_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    return recv_message;

  }; // end of gather function

  /**
   * \brief Multiply message product by node potential and update the
   * belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
             const factor_type& total) {
    // If we have no neighbors than the belief is equal to the
    // potential so simply update the belief
    if(vertex.num_in_edges() + vertex.num_out_edges() == 0) {
      vertex.data().belief = make_node_potential(vertex.id());
    } else {
      vertex_data& vdata = vertex.data();
      // Multiply (add in log space) the potential to compute the belief
      vdata.belief = make_node_potential(vertex.id()) + total;
      ASSERT_GT(vdata.belief.size(), 0);
      // Rescale the belief to ensure numerical stability.  (This is
      // essentially normalization in log-space.)
      vdata.belief.array() -= vdata.belief.maxCoeff();
    }
  }; // end of apply

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  /**
   * \brief Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {  
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    // Divide (subtract in log space) out of the belief the old in
    // message to construct the cavity
    const factor_type& old_in_message = 
      edata.old_message(other_vertex.id(), vertex.id());
    ASSERT_EQ(old_in_message.size(), vertex.data().belief.size());
    factor_type cavity = vertex.data().belief - old_in_message;
    // compute the new message by convolving with the Ising-Potts Edge
    // factor.
    factor_type& new_out_message = 
      edata.message(vertex.id(), other_vertex.id());
    // Make a backup of the last sent message which we will use to
    // maintain the cache
    const factor_type last_sent_message = new_out_message;
    const factor_type& last_recv_message = 
      edata.old_message(vertex.id(), other_vertex.id());
    convolve(cavity, edata.weight(), new_out_message);
    // Renormalize (done in log space)
    new_out_message.array() -= new_out_message.maxCoeff();
    // // Apply damping to the message to stabilize convergence.
    new_out_message = DAMPING * last_sent_message + 
      (1-DAMPING) * new_out_message;
    // Compute message residual
    const double residual = 
      (new_out_message - last_recv_message).cwiseAbs().sum();
    if(USE_CACHE) {
      // context.clear_gather_cache(other_vertex);
      context.post_delta(other_vertex, new_out_message - last_sent_message);
      edata.update_old(vertex.id(), other_vertex.id());
    }
    // Schedule the adjacent vertex
    if(residual > TOLERANCE) context.signal(other_vertex, residual);
 }; // end of scatter

private:

  /**
   * \brief Compute the convolution of the cavity with the Ising-Potts
   * edge potential and store the result in the message
   *
   * \param cavity the belief minus the in-bound message
   * \param weight the edge weight used to scale the smoothing parameter
   * \param [out] message The message in which to store the result of
   * the convolution.
   */
  inline void convolve(const factor_type& cavity, const double& weight, 
                       factor_type& message) const {
    for(int i = 0; i < message.size(); ++i) {
      double sum = 0;
      for(int j = 0; j < cavity.size(); ++j) {
        sum += std::exp( cavity(j)  + ( i == j? 0 : -(SMOOTHING*weight) ) ); 
      }
      // To try and ensure numerical stability we do not allow
      // messages to underflow in log-space
      message(i) = (sum > 0)? std::log(sum) : 
        std::numeric_limits<double>::min();
    }
  } // end of convolve
  
  /**
   * \brief Given an edge and a vertex return the other vertex along
   * that edge. 
   */
  inline vertex_type get_other_vertex(edge_type& edge, 
                                      const vertex_type& vertex) const {
    return vertex.id() == edge.source().id()? edge.target() : edge.source();
  }; // end of other_vertex

}; // end of class bp_vertex_program


/**
 * \brief The edge data loader is used by the GraphLab graph loading
 * API to parse lines in the edge data file. 
 */
bool edge_loader(graph_type& graph, const std::string& fname, 
                 const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type source(-1), target(-1);
  double weight = 1;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source) = qi::_1] >>  -qi::char_(',') 
      >> qi::ulong_[phoenix::ref(target) = qi::_1] >>  
      -(-qi::char_(',') >> qi::double_[phoenix::ref(weight) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space);
  if(!success) return false;
  if(source == target) return true;
  else {
    graph.add_edge(source, target, edge_data(weight));
    return true;
  }
} // end of edge loader


/**
 * \brief The edge initializer is used to allocate the messages along
 * each edge based on the number of states of the source and target
 * vertex.
 */
void edge_initializer(graph_type::edge_type& edge) {
  edge_data& edata = edge.data();
  const graphlab::vertex_id_type source_id = edge.source().id();
  const size_t nsource = NSTATES;
  const graphlab::vertex_id_type target_id = edge.target().id();
  const size_t ntarget = NSTATES;
  edata.initialize(source_id, nsource, target_id, ntarget);
} // end of edge initializer


/**
 * \brief The belief prediction saver is used to save the belief
 * predictions for each vertex.
 */
struct belief_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    strm << vertex.id() << '\t';
    factor_type pred = vertex.data().belief;
    double sum = 0;
    for(int i = 0; i < pred.size(); ++i) 
      sum += (pred(i) = std::exp(pred(i)));
    pred.array() /= sum;
    for(int i = 0; i < pred.size(); ++i) 
      strm << pred(i) << (i+1 < pred.size()? '\t' : '\n');
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of belief_prediction_saver


/**
 * \brief The MAP prediction saver is used to save the map estimated
 * for each vertex.  The MAP estimate is the most likely assignment
 */
struct map_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    size_t prediction = 0;
    vertex.data().belief.maxCoeff(&prediction);
    strm << vertex.id() << '\t' << prediction << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of map prediction_saver


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // Parse command line options -----------------------------------------------
  // \todo update description string
  const std::string description = "Structure prediction solver";
  graphlab::command_line_options clopts(description);
 
  std::string graph_dir;
  std::string output_dir = "pred";
  std::string exec_type = "async";
  std::string format = "tsv";
  bool map = false;
  clopts.attach_option("graph", graph_dir,
                       "The directory containing the adjacency graph");
  clopts.add_positional("graph"); 
  clopts.attach_option("field", FIELD, 
                       "The background field used to construct the node potentials");
  clopts.attach_option("nstates", NSTATES, 
                       "The number of states for each variable");
  clopts.attach_option("cache", USE_CACHE, "use gather caching");
  clopts.attach_option("output", output_dir,
                       "The directory in which to save the predictions");
  clopts.attach_option("format", format, "The graph file format.");
  clopts.add_positional("output");
  clopts.attach_option("smoothing", SMOOTHING,
                       "The amount of smoothing (larger = more)");
  clopts.attach_option("damping", DAMPING,
                       "The amount of damping (0 -> no damping and 1 -> no progress)");
  clopts.attach_option("tol", TOLERANCE,
                       "The tolerance level for convergence.");
  clopts.attach_option("map", map,
                       "Return maximizing assignment instead of the posterior distribution.");
  clopts.attach_option("engine", exec_type,
                       "The type of engine to use {async, sync}.");
  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  clopts.get_engine_args().set_option("use_cache", USE_CACHE);

  if(graph_dir.empty()) {
    logstream(LOG_ERROR) << "No graph was provided." << std::endl;
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();

  ///! load the graph
  graph_type graph(dc, clopts);  


  ///! load the graph
  graph.load_format(graph_dir, format);
  graph.finalize();
  dc.cout() << "Initializing edge data" << std::endl;
  graph.transform_edges(edge_initializer);

  typedef graphlab::omni_engine<bp_vertex_program> engine_type;
  engine_type engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  graphlab::timer timer;
  dc.cout() << "Running engine" << std::endl;
  engine.start();  
  const double runtime = timer.current_time();
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    
    
  std::cout << "Saving predictions" << std::endl;
  const bool gzip_output = false;
  const bool save_vertices = true;
  const bool save_edges = false;
  const size_t threads_per_machine = 2;
  if(map) {
    graph.save(output_dir, map_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  } else { 
    graph.save(output_dir, belief_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }


  //  graphlab::stop_metric_server_on_eof();
  graphlab::stop_metric_server();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/graphical_models/profile_lbp_synthetic2.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 *
 * \brief This application is almost identical to LBP structured
 * prediction except that it generates an artificial field for every
 * vertex enabling the studying of distributed LBP on various graph
 * structures without the need for actual data.  
 *
 *
 * Technical Explanation
 * ========================
 *
 * This application creates a pair-wise Markov Random Field with
 * Ising-Potts edge factors and then uses residual loopy belief
 * propagation to compute posterior belief estimates for each vertex.
 *
 *
 *  \author Joseph Gonzalez
 */


#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <Eigen/Dense>
#include "eigen_serialization.hpp"


#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


/**
 * \brief Eigen library vectors are used to store factor in _LOG
 * SPACE_.
 */
typedef Eigen::VectorXd factor_type;

/**
 * \brief The Ising smoothing parameter which controls the coupling
 * between adjacent predictions in the graph.  Larger values imply
 * greater smoothing (stronger coupling). 
 *
 * \code
 * edge_factor(xi, xj) = exp( (xi == xj)? 0 : -SMOOTHING * edge_weight ); 
 * \endcode
 *
 * Not that the default edge weight is 1 however the graph file can
 * contain an additional edge weight column which allows per edge
 * control of the smoothing parameter.
 *
 * This parameter is set as a command line argument.
 */
double SMOOTHING = 2;


double FIELD = 2;
size_t NSTATES = 5;

/**
 * \brief The Damping parameter which helps ensure stable convergence.
 * Larger damping values lead to slower but more stable convergence.
 *
 * Currently damping is implemented in log-space in the following
 * equation:
 *
 * \code
 * log(new_message) = DAMPING * log(old_message) + 
 *                         (1-DAMPING) * log(new_message);
 * \endcode
 *
 * This parameter is set as a command line argument.
 */
double DAMPING = 0.1;

/**
 * \brief The convergence threshold for each message.  Smaller values
 * imply tighter convergence but slower execution.
 *
 *
 * The algorithm convergence when:
 *   
 * \code
 * sum(abs(log(old_message) - log(new_message))) < TOLERANCE
 * \endcode
 *
 * The parameter is set as a command line argument
 */
double TOLERANCE = 0.01;


bool USE_CACHE = false;


/**
 * Make a synthetic node potential
 */
factor_type make_node_potential(size_t vid) {
  // const size_t obs = vid % NSTATES;
  const size_t obs = 0;
  factor_type factor;
  factor.setZero(NSTATES);
  if(vid % 101 < 1) {
    for(size_t i = 0; i <  NSTATES; ++i) {
      factor(i) = (i == obs)? 0 : -FIELD;
    }
  } 
  return factor;
}

/**
 * \brief The vertex data contains the vertex potential as well as the
 * current belief estimate and represents a random variable in the
 * Markov Random Field.
 *
 * The vertex potential represents the prior and is obtained from the
 * vertex prior file (stored in log form).
 *
 * The belief represents the current posterior estimate.
 */
struct vertex_data {
  factor_type belief;
  void load(graphlab::iarchive& arc) { arc >> belief; }
  void save(graphlab::oarchive& arc) const { arc << belief; }
}; // end of vertex_data


/**
 * \brief The edge data represents an edge in the Markov Random Field
 * and contains the loopy belief propagation message in both
 * directions along that edge as well as the old message in each
 * direction.  In addition each edge contains the weight parameter
 * used to set edge specific smoothing (default value is 1).
 */
class edge_data {
  /**
   * \brief We store old and new messages in both directions as an
   * array of messages.  The particular message index is then computed
   * using the \ref message_idx function.
   */
  factor_type messages_[2];
  /**
   * \brief The weight associated with the edge (used to scale the
   * smoothing parameter)
   */
  double weight_;
  /**
   * \brief The function used to compute the message index in the edge
   * message array.
   */
  size_t message_idx(size_t source_id, size_t target_id) {
    return size_t(source_id < target_id);
  }

public:

  edge_data(const double w = 1) : weight_(w) { }
  const double& weight() const { return weight_; }

  /**
   * \brief Get the new message value from source_id to target_id
   */
  factor_type& message(size_t source_id, size_t target_id) { 
    return messages_[message_idx(source_id, target_id)];
  }
  
  /**
   * \brief Initialize the edge data with source and target having the
   * appropriate number of states.
   *
   * \param source_id the vertex id of the source
   * \param nsource the number of states the source vertex takes
   * \param target_id the vertex id of the target
   * \param ntarget the number of states the target vertex takes
   */
  void initialize(size_t source_id, size_t nsource, size_t target_id, size_t ntarget) {
    ASSERT_GT(nsource, 0); ASSERT_GT(ntarget, 0);
    message(source_id, target_id).setZero(ntarget);
    message(target_id, source_id).setZero(nsource);
  }
  void save(graphlab::oarchive& arc) const {
    for(size_t i = 0; i < 2; ++i) arc << messages_[i];
    arc << weight_;
  }
  void load(graphlab::iarchive& arc) {
    for(size_t i = 0; i < 2; ++i) arc >> messages_[i];
    arc >> weight_;
  }
}; // End of edge data


/**
 * \brief The graph type used to store the Markov Random Field with
 * vertex data containing node potentials and beliefs and edge data
 * containing messages and weights.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/** 
 * \brief The Loopy Belief Propagation Vertex Program which computes
 * the product of the inbound messages during the gather phase,
 * updates the belief during the apply phase, and then computes the
 * new out-bound messages during the scatter phase.
 *
 * Since the gather phase is computing the product of the inbound
 * messages and the messages are stored in log form the resulting sum
 * operation is actually a vector sum and so the gather type is simply
 * the factor type and the operator+= operation for the factor type is
 * sufficient.
 *
 */
struct bp_vertex_program : 
  public graphlab::ivertex_program< graph_type, factor_type,
                                    graphlab::messages::sum_priority >,
  public graphlab::IS_POD_TYPE {
  float residual;
  bp_vertex_program() : residual(0) { }

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of gather_edges 

  /**
   * \brief Update the old message to be the new message and collect the
   * message value.
   */
  factor_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data& edata = edge.data();
    const factor_type old_out_message = edata.message(vertex.id(), other_vertex.id());
    const factor_type old_in_message = edata.message(other_vertex.id(), vertex.id());
    factor_type cavity;
    if(other_vertex.data().belief.size()  == old_out_message.size()) {
      cavity = other_vertex.data().belief - old_out_message;
    } else { cavity = make_node_potential(other_vertex.id()); }
    factor_type new_in_message(old_in_message.size()); 
    convolve(cavity, edata.weight(), new_in_message);
    new_in_message.array() -= new_in_message.maxCoeff();
    new_in_message = DAMPING * old_in_message + (1-DAMPING) * new_in_message;
    new_in_message.array() -= new_in_message.maxCoeff();
    edata.message(other_vertex.id(), vertex.id()) = new_in_message;
    return new_in_message;
  }; // end of gather function

  /**
   * \brief Multiply message product by node potential and update the
   * belief.
   */
  void apply(icontext_type& context, vertex_type& vertex, 
             const factor_type& total) {
    // If we have no neighbors than the belief is equal to the
    // potential so simply update the belief
    if(vertex.num_in_edges() + vertex.num_out_edges() == 0) {
      vertex.data().belief = make_node_potential(vertex.id());
    } else {
      vertex_data& vdata = vertex.data();
      // Multiply (add in log space) the potential to compute the belief
      factor_type new_belief = make_node_potential(vertex.id()) + total;
      ASSERT_GT(new_belief.size(), 0);
      // Rescale the belief to ensure numerical stability.  (This is
      // essentially normalization in log-space.)
      new_belief.array() -= new_belief.maxCoeff();
      if(vdata.belief.size() != new_belief.size()) { residual = 1; }
      else { residual = (new_belief - vdata.belief).cwiseAbs().sum();}
      vdata.belief = new_belief;
    }
  }; // end of apply

  /**
   * \brief Since the MRF is undirected we will use all edges for gather and
   * scatter
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    if(USE_CACHE || residual > TOLERANCE)
      return graphlab::ALL_EDGES; 
    else return graphlab::NO_EDGES;
  }; // end of scatter edges

  /**
   * \brief Compute new message value for each edge.
   */
  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {  
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    if(USE_CACHE) {
      context.clear_gather_cache(other_vertex);
    }
    // Schedule the adjacent vertex
    if(residual > TOLERANCE) context.signal(other_vertex, residual);
 }; // end of scatter

private:

  /**
   * \brief Compute the convolution of the cavity with the Ising-Potts
   * edge potential and store the result in the message
   *
   * \param cavity the belief minus the in-bound message
   * \param weight the edge weight used to scale the smoothing parameter
   * \param [out] message The message in which to store the result of
   * the convolution.
   */
  inline void convolve(const factor_type& cavity, const double& weight, 
                       factor_type& message) const {
    for(int i = 0; i < message.size(); ++i) {
      double sum = 0;
      for(int j = 0; j < cavity.size(); ++j) {
        sum += std::exp( cavity(j)  + ( i == j? 0 : -(SMOOTHING*weight) ) ); 
      }
      // To try and ensure numerical stability we do not allow
      // messages to underflow in log-space
      message(i) = (sum > 0)? std::log(sum) : 
        std::numeric_limits<double>::min();
    }
  } // end of convolve
  
  /**
   * \brief Given an edge and a vertex return the other vertex along
   * that edge. 
   */
  inline vertex_type get_other_vertex(edge_type& edge, 
                                      const vertex_type& vertex) const {
    return vertex.id() == edge.source().id()? edge.target() : edge.source();
  }; // end of other_vertex

}; // end of class bp_vertex_program


/**
 * \brief The edge data loader is used by the GraphLab graph loading
 * API to parse lines in the edge data file. 
 */
bool edge_loader(graph_type& graph, const std::string& fname, 
                 const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;
  graphlab::vertex_id_type source(-1), target(-1);
  double weight = 1;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(source) = qi::_1] >>  -qi::char_(',') 
      >> qi::ulong_[phoenix::ref(target) = qi::_1] >>  
      -(-qi::char_(',') >> qi::double_[phoenix::ref(weight) = qi::_1])
      )
     ,
     //  End grammar
     ascii::space);
  if(!success) return false;
  if(source == target) return true;
  else {
    graph.add_edge(source, target, edge_data(weight));
    return true;
  }
} // end of edge loader


/**
 * \brief The edge initializer is used to allocate the messages along
 * each edge based on the number of states of the source and target
 * vertex.
 */
void edge_initializer(graph_type::edge_type& edge) {
  edge_data& edata = edge.data();
  const graphlab::vertex_id_type source_id = edge.source().id();
  const size_t nsource = NSTATES;
  const graphlab::vertex_id_type target_id = edge.target().id();
  const size_t ntarget = NSTATES;
  edata.initialize(source_id, nsource, target_id, ntarget);
} // end of edge initializer


/**
 * \brief The belief prediction saver is used to save the belief
 * predictions for each vertex.
 */
struct belief_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    strm << vertex.id() << '\t';
    factor_type pred = vertex.data().belief;
    double sum = 0;
    for(int i = 0; i < pred.size(); ++i) 
      sum += (pred(i) = std::exp(pred(i)));
    pred.array() /= sum;
    for(int i = 0; i < pred.size(); ++i) 
      strm << pred(i) << (i+1 < pred.size()? '\t' : '\n');
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of belief_prediction_saver


/**
 * \brief The MAP prediction saver is used to save the map estimated
 * for each vertex.  The MAP estimate is the most likely assignment
 */
struct map_prediction_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    std::stringstream strm;
    size_t prediction = 0;
    vertex.data().belief.maxCoeff(&prediction);
    strm << vertex.id() << '\t' << prediction << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; // nop
  }
}; // end of map prediction_saver


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  // Parse command line options -----------------------------------------------
  // \todo update description string
  const std::string description = "Structure prediction solver";
  graphlab::command_line_options clopts(description);
 
  std::string graph_dir;
  std::string output_dir = "pred";
  std::string exec_type = "async";
  std::string format = "tsv";
  bool map = false;
  clopts.attach_option("graph", graph_dir,
                       "The directory containing the adjacency graph");
  clopts.add_positional("graph"); 
  clopts.attach_option("field", FIELD, 
                       "The background field used to construct the node potentials");
  clopts.attach_option("nstates", NSTATES, 
                       "The number of states for each variable");
  clopts.attach_option("cache", USE_CACHE, "use gather caching");
  clopts.attach_option("output", output_dir,
                       "The directory in which to save the predictions");
  clopts.attach_option("format", format, "The graph file format.");
  clopts.add_positional("output");
  clopts.attach_option("smoothing", SMOOTHING,
                       "The amount of smoothing (larger = more)");
  clopts.attach_option("damping", DAMPING,
                       "The amount of damping (0 -> no damping and 1 -> no progress)");
  clopts.attach_option("tol", TOLERANCE,
                       "The tolerance level for convergence.");
  clopts.attach_option("map", map,
                       "Return maximizing assignment instead of the posterior distribution.");
  clopts.attach_option("engine", exec_type,
                       "The type of engine to use {async, sync}.");
  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  clopts.get_engine_args().set_option("use_cache", USE_CACHE);

  if(graph_dir.empty()) {
    logstream(LOG_ERROR) << "No graph was provided." << std::endl;
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();

  ///! load the graph
  graph_type graph(dc, clopts);  


  ///! load the graph
  graph.load_format(graph_dir, format);
  graph.finalize();
  dc.cout() << "Initializing edge data" << std::endl;
  graph.transform_edges(edge_initializer);

  typedef graphlab::omni_engine<bp_vertex_program> engine_type;
  engine_type engine(dc, graph, exec_type, clopts);
  engine.signal_all();
  graphlab::timer timer;
  dc.cout() << "Running engine" << std::endl;
  engine.start();  
  const double runtime = timer.current_time();
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;
    
    
  std::cout << "Saving predictions" << std::endl;
  const bool gzip_output = false;
  const bool save_vertices = true;
  const bool save_edges = false;
  const size_t threads_per_machine = 2;
  if(map) {
    graph.save(output_dir, map_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  } else { 
    graph.save(output_dir, belief_prediction_saver(),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }


  //  graphlab::stop_metric_server_on_eof();
  graphlab::stop_metric_server();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/graphical_models/synthetic_image_data.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * This application creates a synthetic data to test and demonstrate
 * the structred prediction applications.  The synthetic task is to
 * remove noise from a synthetic noisy image.
 *
 * In addition this application can be used to take the output of
 * the structured prediction tools and rendering the predicted noise
 * free image.
 *
 *
 * 
 *  \author Joseph Gonzalez
 */

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>


#include <cv.h>
#include <highgui.h>  

#include <graphlab.hpp>

using namespace cv;


/**
 * The pixel struct encodes a pixel location and value.
 *
 * Because each pixel corresponds to vertex in the graph we need a
 * mapping between integers and coordinats.  This is accomplished by
 * using the first two lowest order bytes to encode the column and the
 * two highest order bytes to encode the row.
 */
struct pixel {
  uint16_t row, col;
  double value;
  pixel(uint32_t ind = 0, double value = 0) :
    row(ind >> 16), col( ind & ((1 << 16)-1)), value(value) {  }
}; // end of sub2ind


graphlab::vertex_id_type sub2ind(uint16_t r, uint16_t c) {
  ASSERT_LT(r, ((1 << 16)-1));
  ASSERT_LT(c, ((1 << 16)-1));
  return (r << 16) | c;
}; // end of sub2ind


void make_data(const uint16_t rows, const uint16_t cols,
               const size_t ncolors, const double error_rate,
               const std::string& vdata_fn,
               const std::string& edata_fn,
               const std::string& orig_img_fn,
               const std::string& noisy_img_fn) {
  const double center_r = rows / 2.0;
  const double center_c = cols / 2.0;
  const double max_radius = std::min(rows, cols) / 2.0;

  Mat orig_img(cols, rows, CV_8UC1);
  Mat noisy_img(cols, rows, CV_8UC1);
  std::ofstream vdata_fout(vdata_fn.c_str());
  std::ofstream edata_fout(edata_fn.c_str());

  for(size_t r = 0; r < rows; ++r) {
    for(size_t c = 0; c < cols; ++c) {
      // determine the true pixel id
      const graphlab::vertex_id_type vid = sub2ind(r,c);
      // Compute the true pixel value
      const double distance = sqrt((r-center_r)*(r-center_r) + 
                                   (c-center_c)*(c-center_c));
      // Compute ring of sunset
      const uint16_t ring_color =  
        std::floor(std::min(1.0, distance/max_radius) * (ncolors - 1) );
      // Compute the true pixel color by masking with the horizon
      const uint16_t true_color = r < rows/2 ? ring_color : 0;
      // compute the predicted color
      const uint16_t obs_color = graphlab::random::rand01() < error_rate?
        graphlab::random::fast_uniform<uint16_t>(0, ncolors-1) : true_color;

      const double c1p = double(true_color)/(ncolors-1);
      unsigned char c1 = (unsigned char)(255 * c1p > 255 ? 255 : 255 * c1p);
      orig_img.at<unsigned char>(r, c) = c1;
      const double c2p = double(obs_color)/(ncolors-1);
      unsigned char c2 = (unsigned char)(255 * c2p > 255 ? 255 : 255 * c2p);
      noisy_img.at<unsigned char>(r, c) = c2;

      // Save the prior
      vdata_fout << vid << '\t';
      for(size_t pred = 0; pred < ncolors; ++pred) {
        const double prior = obs_color == pred? error_rate : (error_rate) /(ncolors - 1);
        vdata_fout << prior << (pred+1 < ncolors? '\t' : '\n');
      }

      // Add the edges
      if(r + 1 < rows) 
        edata_fout << vid << '\t' << sub2ind(r+1,c) << '\n';
      if(c + 1 < cols) 
        edata_fout << vid << '\t' << sub2ind(r,c+1) << '\n';
    } // end of loop over cols
  } // end of loop over rows

  vdata_fout.close();
  edata_fout.close();
  imwrite(orig_img_fn, orig_img);
  imwrite(noisy_img_fn, noisy_img);
} // end of make data


void read_data(const std::string& pred_img_fn) {
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;

  std::vector<pixel> pixels;

  std::string line;
  size_t line_counter = 0;
  uint16_t nrows = 0, ncols = 0;
  size_t min_pixel(-1);
  size_t max_pixel(0);
  while (std::getline(std::cin, line)) {
    graphlab::vertex_id_type vid(-1);
    std::vector<double> values;
    const bool success = qi::phrase_parse
      (line.begin(), line.end(),       
       //  Begin grammar
       (
        qi::ulong_[phoenix::ref(vid) = qi::_1] >> -qi::char_(",") >>
        (qi::double_[phoenix::push_back(phoenix::ref(values), qi::_1)] % -qi::char_(",") )
        )
       ,
       //  End grammar
       ascii::space); 
    if(!success) {
      logstream(LOG_ERROR) << "Error parsing line: " << line_counter << std::endl
                           << "\t\"" << line << "\"";
    }
    ASSERT_GT(values.size(), 0);
    const size_t pred = 
      std::max_element(values.begin(), values.end()) - values.begin();
    min_pixel = std::min(min_pixel, pred);
    max_pixel = std::max(max_pixel, pred);
    const pixel pix(vid, double(pred) / (values.size() - 1) );
    pixels.push_back(pix);  
    nrows = std::max(nrows, pix.row);
    ncols = std::max(ncols, pix.col);
  }
  nrows++; ncols++;
  std::cout << "nrows: " << nrows << std::endl
            << "ncols: " << ncols << std::endl
            << "minp:  " << min_pixel << std::endl
            << "maxp:  " << max_pixel << std::endl;
  Mat pred_img(ncols, nrows, CV_8UC1);
  for(size_t i = 0; i < pixels.size(); ++i) {
    int s = 255 * pixels[i].value;
    pred_img.at<unsigned char>(pixels[i].row, pixels[i].col) = 
                                     (unsigned char)(s >= 255 ? 255 : s);
  }
  imwrite(pred_img_fn, pred_img);
} // end of make data


int main(int argc, char** argv) {
  std::cout << "Create a synthetic noisy image." << std::endl;

  // Set initial values for members ------------------------------------------->
  size_t ncolors = 5;
  double error_rate = 0.5;
  uint16_t nrows = 200;
  uint16_t ncols = 200;
 
  std::string vdata_fn = "synth_vdata.tsv";
  std::string edata_fn = "synth_edata.tsv";

  std::string orig_img_fn = "orig_img.jpeg";
  std::string noisy_img_fn = "noisy_img.jpeg";
  std::string pred_img_fn;

 
  // Parse command line arguments --------------------------------------------->
  graphlab::command_line_options clopts("Create synthetic prediction", false);
 
  clopts.attach_option("vdata", vdata_fn,
                       "Vertex prior filename");
  clopts.attach_option("edata", edata_fn,
                       "Adjacency information");
  clopts.attach_option("ncolors", ncolors,
                       "The number of colors in the noisy image");
  clopts.attach_option("error_rate", error_rate,
                       "Standard deviation of noise.");
  clopts.attach_option("nrows", nrows,
                       "The number of rows in the noisy image");
  clopts.attach_option("ncols", ncols,
                       "The number of columns in the noisy image");
  clopts.attach_option("orig", orig_img_fn,
                       "Original image file name.");
  clopts.attach_option("noisy", noisy_img_fn,
                       "Noisy image file name.");
  clopts.attach_option("pred", pred_img_fn,
                       "Predicted image file name.");
    
  ///! Initialize control plain using mpi
  const bool success = clopts.parse(argc, argv);
  if(!success) {
    return EXIT_FAILURE;
  }

  if(!pred_img_fn.empty()) {
    std::cout << "Reading in predictions" << std::endl;
    read_data(pred_img_fn);
  } else {
    std::cout << "Generating synthetic data" << std::endl;
    make_data(nrows, ncols, ncolors, error_rate,
              vdata_fn, edata_fn,
              orig_img_fn, noisy_img_fn);
  }
  return EXIT_SUCCESS;
} // End of main


// void save_image(const size_t rows, const size_t cols,
//                 const std::vector<pred_pair_type>& values,
//                 const std::string& fname) {
//   using namespace Magick;
//   std::cout << "NPixels: " << values.size() << std::endl;
//   // determine the max and min colors
//   float max_color = -std::numeric_limits<float>::max();
//   float min_color =  std::numeric_limits<float>::max();
//   foreach(pred_pair_type pair, values) {
//     max_color = std::max(max_color, pair.second);
//     min_color = std::min(min_color, pair.second);
//   }
//   Image img(Magick::Geometry(rows, cols), "white");
//   // img.modifyImage();
//   // Pixels img_cache(img);
//   // PixelPackets* pixels = img_cache.
//   foreach(pred_pair_type pair, values) {
//     std::pair<int,int> coords = ind2sub(rows,cols, pair.first);
//     float value = (pair.second - min_color) / (max_color - min_color);
//     Color color(MaxRGB * value, MaxRGB * value, MaxRGB * value, 0);
//     img.pixelColor(coords.second, coords.first, color);
//   }
//   img.write(fname);
// }


================================================
FILE: toolkits/graphical_models/utils.hpp
================================================

// utils.hpp - miscellaneous utilities 
// Originally from Nicol N. Schraudolph's isinf package
// Later expanded by Dhruv Batra

#ifndef UTILS_HPP
#define UTILS_HPP

#include <cmath>
#include <assert.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <set>
#include <limits>
#include <cstdlib>
#include <string>

// row-major array access
#define ARR_RM(arr, r_ind, c_ind, ncols) (*(arr + r_ind*ncols + c_ind))
// col-major array access
#define ARR_CM(arr, r_ind, c_ind, nrows) (*(arr + c_ind*nrows + r_ind))

// row-major ind2sub
#define IND2SUB_RM(ind,r,c,ncols) \
        r = floor(ind/ncols);     \
        c = ind % ncols;
// column-major ind2sub
#define IND2SUB_CM(ind,r,c,nrows) \
        c = floor(ind/nrows);     \
        r = ind % nrows;     

// row-major sub2ind
#define SUB2IND_RM(r,c,ncols) r*ncols + c

// col-major sub2ind
#define SUB2IND_CM(r,c,nrows) c*nrows + r

// operators & formatted I/O for vectors
// inner product
template <class T>
inline T operator*(const std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    T sum(0);
    for (size_t i = 0; i < a.size(); ++i)
        sum += a[i]*b[i];
    return sum;
}

// element-wise sum
template <class T>
inline std::vector<T>& operator+(const std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    std::vector<T> sum(a.size());
    for (size_t i = 0; i < a.size(); ++i)
        sum[i] = a[i]+b[i];
    return sum;
}

template <class T>
inline std::vector<T>& operator+=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ += b;
    return a;
}

template <class T>
inline std::vector<T>& operator-=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ -= b;
    return a;
}

template <class T>
inline std::vector<T>& operator*=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ *= b;
    return a;
}

template <class T>
inline std::vector<T>& operator/=(std::vector<T>& a, const T& b)
{
    typename std::vector<T>::iterator i(a.begin());
    while (i != a.end()) *i++ /= b;
    return a;
}

template <class T>
inline std::vector<T>& operator+=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ += *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator-=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ -= *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator*=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ *= *j++;
    return a;
}

template <class T>
inline std::vector<T>& operator/=(std::vector<T>& a, const std::vector<T>& b)
{
    assert(a.size() == b.size());
    typename std::vector<T>::iterator i(a.begin());
    typename std::vector<T>::const_iterator j(b.begin());
    while (i != a.end()) *i++ /= *j++;
    return a;
}

template <class T>
inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& x)
{
    typename std::vector<T>::const_iterator i(x.begin());
    while(i != x.end()) os << *i++ << ' ';
    return os;
}

template <class T>
inline std::istream& operator>>(std::istream& is, std::vector<T>& x)
{
    std::string s;
    const size_t n = x.size();

    while (x.size() == n)
    {
        getline(is, s);
        if (is.fail()) break;

        std::istringstream iss(s);
        T item;

        iss >> item;
        while (iss.good())
        {
            x.push_back(item);
            iss >> item;
        }
        if (!iss.fail())
            x.push_back(item);
    }

    return is;
}

// Function to write a vector to file (Assumes << is defined for type T)
// CHECK_NULL is provided by Danny Tarlow's Nymph Utils
template <typename T> void WriteToFile(std::string fname, std::vector<T> vecx)
{
    std::ofstream fout; 
    fout.open(fname.c_str());
    
    //CHECK_NULL(fout.fail(),"Could not open file for writing results\n");
    
    fout << vecx;
    
    fout.close();
}


#endif


================================================
FILE: toolkits/linear_solvers/CMakeLists.txt
================================================
project(GraphLab)
# include(CheckCXXSourceCompiles)


# Build als
add_graphlab_executable(jacobi jacobi.cpp)
requires_eigen(jacobi) # build and attach eigen


================================================
FILE: toolkits/linear_solvers/jacobi.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * Functionality: The code solves the linear system Ax = b using
 * The Jacobi algorithm. (A is a square matrix). 
 * A assumed to be full column rank.  Algorithm is described
 * http://en.wikipedia.org/wiki/Jacobi_method
 * Written by Danny Bickson 
 */
#include "../collaborative_filtering/eigen_wrapper.hpp"
#include "../collaborative_filtering/types.hpp"
#include "../collaborative_filtering/eigen_serialization.hpp"
#include <graphlab/util/stl_util.hpp>
#include <graphlab.hpp>


enum jacobi_fields{
  JACOBI_X = 0,
  JACOBI_REAL_X = 1,
  JACOBI_Y = 2,
  JACOBI_PREV_X = 3,
  JACOBI_PREC = 4
};

int actual_vector_len = 5;
int data_size = 5;
bool final_residual = true;
bool zero = false;  //allow for zero entries in sparse matrix market format
bool update_function = false;
double ortho_repeats = 3;
std::string vecfile;
int rows = 0, cols = 0;
int max_iter = 10;
double tol = 1e-5;
int quiet = 0;
int unittest = 0;

struct vertex_data {
  vec pvec;
  double A_ii;
  //real_type y, Aii;
  //real_type pvec[JACOBI_X], pvec[JACOBI_REAL_X], pvec[JACOBI_PREV_X];
  vertex_data(): A_ii(1) { //: y(0), Aii(1), pvec[JACOBI_X](0), pvec[JACOBI_REAL_X](0), 
                 // pvec[JACOBI_PREV_X](-1) 
     pvec = zeros(data_size);
     pvec[JACOBI_PREV_X] = -1;
  }
  void save(graphlab::oarchive& arc) const { 
    arc << pvec << A_ii;
  }
  /** \brief Load the vertex data from a binary archive */
  void load(graphlab::iarchive& arc) { 
    arc >> pvec >> A_ii;
  }
}; // end of vertex_data

class gather_type {
  public:
    vec pvec;
    double training_rmse;
    double validation_rmse;
    gather_type() { training_rmse= validation_rmse = 0; }
    void save(graphlab::oarchive& arc) const { arc << pvec << training_rmse << validation_rmse; }
    void load(graphlab::iarchive& arc) { arc >> pvec >> training_rmse >> validation_rmse; }  
    gather_type& operator+=(const gather_type& other) {
      pvec += other.pvec;
      training_rmse += other.training_rmse;
      validation_rmse += other.validation_rmse;
      return *this;
    } 

};

//gather_type ret;


struct edge_data : public graphlab::IS_POD_TYPE {
  double obs;
  int role;
  enum data_role_type { TRAIN, VALIDATE, PREDICT  };

  edge_data(double obs = 1, data_role_type role = TRAIN) :
    obs(obs), role(role) { }

}; // end of edge data


/**
 * \brief The graph type is defined in terms of the vertex and edge
 * data.
 */ 
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;
graph_type * pgraph;

/**
 * \brief Given a vertex and an edge return the other vertex in the
 * edge.
 */
inline graph_type::vertex_type
get_other_vertex(graph_type::edge_type& edge, 
    const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}; // end of get_other_vertex

//typedef double gather_type;
typedef double message_type;

void start_engine();
#include "../collaborative_filtering/math.hpp"


void verify_values(int unittest, double residual){
   if (unittest == 1)
     assert(residual < 1e-5);
}
/**
 * \brief The graph loader function is a line parser used for
 * distributed graph construction.
 */
inline bool graph_loader(graph_type& graph, 
    const std::string& filename,
    const std::string& line) {

  //no need to parse
  if (boost::algorithm::ends_with(filename ,vecfile))
    return true;

  ASSERT_FALSE(line.empty()); 
  // Determine the role of the data
  edge_data::data_role_type role = edge_data::TRAIN;
  
  // Parse the line
  std::stringstream strm(line);
  graph_type::vertex_id_type source_id(-1), target_id(-1);
  float obs(0);
  strm >> source_id >> target_id;
  source_id--; target_id--;
  if (source_id >= (uint)rows)
    logstream(LOG_FATAL)<<"Row number: " << source_id << " sould be < rows " << rows << " [ line: " << line << " ] " << std::endl;
  if (target_id >= (uint)cols)
    logstream(LOG_FATAL)<<"Col number: " << target_id << " sould be < cols " << cols << " [ line: " << line << " ] " << std::endl;
  strm >> obs;
  if (!info.is_square())
  target_id = rows + target_id;

  if (source_id == target_id){
      vertex_data data;
      data.A_ii = obs;
      data.pvec[JACOBI_PREC] = obs;
      graph.add_vertex(source_id, data);
  }
  // Create an edge and add it to the graph
  else graph.add_edge(source_id, target_id, edge_data(obs, role)); 
  return true; // successful load
} // end of graph_loader


#include "../collaborative_filtering/math.hpp" //uses vertex_data and edge_data so has to be included here
#include "../collaborative_filtering/printouts.hpp" // the same
typedef graphlab::omni_engine<Axb> engine_type;
engine_type * pengine = NULL;

struct linear_model_saver {
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;

  int pos;
  linear_model_saver(int pos): pos(pos) {}

  std::string save_vertex(const vertex_type& vertex) const {
     assert(pos >= 0 && pos < vertex.data().pvec.size());
     std::string ret;
     ret = boost::lexical_cast<std::string>(vertex.id() + 1) + " ";
     ret += boost::lexical_cast<std::string>(vertex.data().pvec[pos]) + "\n";
     return ret;
  }
  std::string save_edge(const edge_type& edge) const {
    return "";
  }
}; 


void start_engine(){
  vertex_set nodes = pgraph->select(selected_node);
  pengine->signal_vset(nodes);
  pengine->start();
}

int main(int argc, char** argv) {
  global_logger().set_log_to_console(true);

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "Solve a linear system using Jacobi method";
  graphlab::command_line_options clopts(description);
  std::string input_dir, output_dir;
  std::string exec_type = "synchronous";
  clopts.attach_option("matrix", input_dir,
      "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("initial_vector", vecfile,"optional initial vector");
  clopts.attach_option("debug", debug, "Display debug output.");
  clopts.attach_option("unittest", unittest,  
      "unit testing 0=None, 1=3x3 matrix");
  clopts.attach_option("max_iter", max_iter, "max iterations");
  clopts.attach_option("regularization", regularization, "regularization");
  clopts.attach_option("tol", tol, "convergence threshold");
  clopts.attach_option("rows", rows, "number of rows");
  clopts.attach_option("cols", cols, "number of cols");
  clopts.attach_option("quiet", quiet, "quiet mode (less verbose)");
  if(!clopts.parse(argc, argv) || input_dir == "") {
    std::cout << "Error in parsing command line arguments." << std::endl;
    clopts.print_description();
    return EXIT_FAILURE;
  }
  if (quiet){
    global_logger().set_log_level(LOG_ERROR);
    debug = false;
  }

  if (rows <= 0 || cols <= 0 || rows != cols)
    logstream(LOG_FATAL)<<"Please specify number of rows/cols of the input matrix" << std::endl;
    
 
  info.rows = rows;
  info.cols = cols;

  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; 
  graph_type graph(dc, clopts);  
  graph.load(input_dir, graph_loader); 
  pgraph = &graph;
  dc.cout() << "Loading graph. Finished in " 
    << timer.current_time() << std::endl;
  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
    << timer.current_time() << std::endl;


  dc.cout() 
    << "========== Graph statistics on proc " << dc.procid() 
    << " ==============="
    << "\n Num vertices: " << graph.num_vertices()
    << "\n Num edges: " << graph.num_edges()
    << "\n Num replica: " << graph.num_replicas()
    << "\n Replica to vertex ratio: " 
    << float(graph.num_replicas())/graph.num_vertices()
    << "\n --------------------------------------------" 
    << "\n Num local own vertices: " << graph.num_local_own_vertices()
    << "\n Num local vertices: " << graph.num_local_vertices()
    << "\n Replica to own ratio: " 
    << (float)graph.num_local_vertices()/graph.num_local_own_vertices()
    << "\n Num local edges: " << graph.num_local_edges()
    //<< "\n Begin edge id: " << graph.global_eid(0)
    << "\n Edge balance ratio: " 
    << float(graph.num_local_edges())/graph.num_edges()
    << std::endl;

  dc.cout() << "Creating engine" << std::endl;
  engine_type engine(dc, graph, exec_type, clopts);
  pengine = &engine;

  init_math(&graph, info, ortho_repeats, update_function);

  if (vecfile.size() > 0){
    std::cout << "Load b vector from file" << input_dir << vecfile << std::endl;
    FILE * file = fopen((input_dir + vecfile).c_str(), "r");
    if (file == NULL)
      logstream(LOG_FATAL)<<"Failed to open initial vector"<< std::endl;
    vec input = vec::Zero(rows);
    double val = 0;
    for (int i=0; i< rows; i++){
      int rc = fscanf(file, "%lg\n", &val);
      if (rc != 1)
        logstream(LOG_FATAL)<<"Failed to open initial vector"<< std::endl;
      input[i] = val;
    }
    fclose(file);
    DistVec v0(info, JACOBI_Y, false, "v0");
    v0 = input;
  }  

  dc.cout() << "Running Jacobi" << std::endl;
  dc.cout() << "(C) Code by Danny Bickson, CMU " << std::endl;
  dc.cout() << "Please send bug reports to danny.bickson@gmail.com" << std::endl;
  timer.start();

  DistMat A(info);
  DistVec b(info, JACOBI_Y,true, "b");
  DistVec x(info, JACOBI_X,true, "x", JACOBI_PREV_X);
  DistVec A_ii(info, JACOBI_PREC, true, "A_ii");

  PRINT_VEC(b);
  PRINT_VEC(x);
  PRINT_VEC(A_ii);
  for (int i=0; i < max_iter; i++){
    mi.use_diag = false;
    x = (b - A*x)/A_ii;
    PRINT_VEC(x);
  }
 
  dc.cout() << "Jacobi finished in " << runtime << std::endl;
  dc.cout() << "\t Updates: " << engine.num_updates() << std::endl;

    DistVec p(info, JACOBI_PREV_X, true, "p");
    mi.use_diag = true;
    p = A*x -b;
    PRINT_VEC(p);
    DistDouble ret = norm(p);
    dc.cout() << "Solution converged to residual: " << ret.toDouble() << std::endl;
 
  //vec ret = fill_output(&core.graph(), info, JACOBI_X);
  //write_output_vector(datafile + "x.out", format, ret, false);
  const double runtime = timer.current_time();
  dc.cout() << "----------------------------------------------------------"
    << std::endl
    << "Final Runtime (seconds):   " << runtime 
                                        << std::endl
                                        << "Updates executed: " << engine.num_updates() << std::endl
                                        << "Update Rate (updates/second): " 
                                          << engine.num_updates() / runtime << std::endl;

  graph.save("x.out", linear_model_saver(JACOBI_X), false, true, false, 1);
  graphlab::mpi_tools::finalize();

   return EXIT_SUCCESS;
}


================================================
FILE: toolkits/linear_solvers/linear_solvers.dox
================================================
/** 

\page linear_solvers Linear iterative solver

\brief GraphLab linear solver library is used for solving the linear system Ax = b.

\section Jacobi
The Jacobi algorithm is one of the simplest methods, it works by applying repeatedly the update rule:
\verbatim
x = (b-(A-diag(diag(A))*x) ./ diag(A)
\endverbatim

\section Input
The input folder is given using the command line --matrix=folder_name. Inside this folder should have a sparse matrix A file with the format, in each line.
\verbatim
row col val
\endverbatim

Additional input vector b is given using the command --input_vector=filename, this file should be found inside the folder given by --matrix. 

\section Output
The output of the computation is a solution vector x. File name is x.out_1_of_1.

\section Jacobi example.

Assume we have the linear system
\verbatim
A=[  1.8147    0.9134    0.2785
     0.9058    1.6324    0.5469
     0.1270    0.0975    1.9575 ];
b= [ 0.9649    0.1576    0.9706 ]';
\endverbatim
The solution will be
\verbatim
x = A \ b = [ 0.6803   -0.4396    0.4736 ]';
\endverbatim

To run it in GraphLab we preper a folder named
jacobi_testA, inside it we have the input file for A
named A:
\verbatim
1 1 1.8147
1 2 0.9134
1 3 0.2785
2 1 0.9058
2 2 1.6324
2 3 0.5469
3 1 0.127
3 2 0.0975
3 3 1.9575
\endverbatim

And the input file for b (called vecB)
\verbatim
0.9649
0.1576
0.9706
\endverbatim

Note: both A and vecB files are found under jacobi_testA/ folder.

Now we run:
\verbatim
./jacobi --matrix=jacobi_testA/ --initial_vec=vecB --rows=3 --cols=3 --debug=1 --max_iter=10

Running Jacobi
(C) Code by Danny Bickson, CMU 
Please send bug reports to danny.bickson@gmail.com
Solution converged to residual: 0.00507232
----------------------------------------------------------
Final Runtime (seconds):   0.649031
Updates executed: 33
Update Rate (updates/second): 50.845
\endverbatim

We examine the output:
\verbatim
$ cat x.out_1_of_1
1 0.67806771486642969
2 -0.43984914015767995
3 0.47337333903573475
\endverbatim

*/


================================================
FILE: toolkits/toolkits.dox
================================================
/**

\page toolkits GraphLab Toolkits


To enable users to use GraphLab out-of-the-box and to demonstrate the
power of the GraphLab API we have implemented a collection of
applications to address a wide range of standard tasks in large-scale
graph computation.  

We have implemented the following toolkits

\li \subpage topic_modeling  contains applications like LDA which can be
used to cluster documents and extract topical representations.

\li \subpage graph_algorithms contains algorithms mostly from the Social Network Analysis Handbook algorithms set.

\li \subpage graph_analytics contains application like pagerank and
triangle counting which can be applied to general graphs to estimate
community structure.

\li \subpage clustering contains standard data clustering tools such as 
Kmeans

\li \subpage collaborative_filtering contains a collection of
applications used to make predictions about users interests and
factorize large matrices.

\li \subpage graphical_models contains tools for making joint predictions
about collections of related random variables.

\li \subpage factor_graphs contains Belief Propagation impelementation for
factor graphs

\li \subpage linear_solvers contains solvers for linear systems of equations - currently the Jacobi algorithm is implemented

\li \subpage computer_vision contains a collection of tools for
reasoning about images.


*/


================================================
FILE: toolkits/topic_modeling/CMakeLists.txt
================================================
project(GraphLab)

# Primary executable
add_graphlab_executable(lda_sequential_cgs lda_sequential_cgs.cpp)
add_graphlab_executable(cgs_lda cgs_lda.cpp)
add_graphlab_executable(cgs_lda_mimno_experimental cgs_lda_mimno_experimental.cpp)


================================================
FILE: toolkits/topic_modeling/cgs_lda.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file cgs_lda.cpp
 *
 * \brief This file contains a GraphLab based implementation of the
 * Collapsed Gibbs Sampler (CGS) for the Latent Dirichlet Allocation
 * (LDA) model.
 *
 * 
 *
 * \author Joseph Gonzalez, Diana Hu
 */

#include <vector>
#include <algorithm>

#include <graphlab/ui/mongoose/mongoose.h>
#include <boost/math/special_functions/gamma.hpp>
#include <vector>
#include <algorithm>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <graphlab/parallel/atomic.hpp>


// Global Types
// ============================================================================
typedef long count_type;


/**
 * \brief The factor type is used to store the counts of tokens in
 * each topic for words, documents, and assignments.
 *
 * Atomic counts are used because we violate the abstraction by
 * modifying adjacent vertex data on scatter.  As a consequence
 * multiple threads on the same machine may try to update the same
 * vertex data at the same time.  The graphlab::atomic type ensures
 * that multiple increments are serially consistent.
 */
typedef std::vector< graphlab::atomic<count_type> > factor_type;


/**
 * \brief We use the factor type in accumulators and so we define an
 * operator+=
 */
inline factor_type& operator+=(factor_type& lvalue,
                               const factor_type& rvalue) {
  if(!rvalue.empty()) {
    if(lvalue.empty()) lvalue = rvalue;
    else {
      for(size_t t = 0; t < lvalue.size(); ++t) lvalue[t] += rvalue[t];
    }
  }
  return lvalue;
} // end of operator +=

// We include the rest of GraphLab after we define the operator+= for
// vector.
#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


/**
 * \brief The latent topic id of a token is the smallest reasonable
 * type.
 */
typedef uint16_t topic_id_type;

// We require a null topic to represent the topic assignment for
// tokens that have not yet been assigned.
#define NULL_TOPIC (topic_id_type(-1))


/**
 * \brief The assignment type is used on each edge to store the
 * assignments of each token.  There can be several occurrences of the
 * same word in a given document and so a vector is used to store the
 * assignments of each occurrence.
 */
typedef std::vector< topic_id_type > assignment_type;


// Global Variables
// ============================================================================

/**
 * \brief The alpha parameter determines the sparsity of topics for
 * each document.
 */
double ALPHA = 1;

/**
 * \brief the Beta parameter determines the sparsity of words in each
 * document.
 */
double BETA = 0.1;

/**
 * \brief the total number of topics to uses
 */
size_t NTOPICS = 50;

/**
 * \brief The total number of words in the dataset.
 */
size_t NWORDS = 0;

/**
 * \brief The total number of docs in the dataset.
 */
size_t NDOCS = 0;

/**
 * \brief The total number of tokens in the corpus
 */
size_t NTOKENS = 0;


/**
 * \brief The number of top words to display during execution (from
 * each topic).
 */
size_t TOPK = 5;

/**
 * \brief The interval to display topics during execution.
 */
size_t INTERVAL = 10;


/**
 * \brief The interval to compute & display the likelihood
 */
size_t LIK_INTERVAL = 5;

/**
 * \brief The global variable storing the global topic count across
 * all machines.  This is maintained periodically using aggregation.
 */
factor_type GLOBAL_TOPIC_COUNT;

/**
 * \brief A dictionary of words used to print the top words during
 * execution.
 */
std::vector<std::string> DICTIONARY;

/**
 * \brief The maximum occurences allowed for an individual term-doc
 * pair. (edge data)
 */
size_t MAX_COUNT = 100;


/**
 * \brief The time to run until the first sample is taken.  If less
 * than zero then the sampler will run indefinitely.
 */
float BURNIN = -1;

/**
 * \brief The json top word struct contains the current set of top
 * words for each topic encoded in the form of a json string.
 */
struct top_words_type {
  graphlab::mutex lock;
  std::string json_string;
  top_words_type() : 
    json_string("{\n" + json_header_string() + "\tvalues: [] \n }") { }
  inline std::string json_header_string() const {
    return
      "\t\"ntopics\": " + graphlab::tostr(NTOPICS) + ",\n" +
      "\t\"nwords\":  " + graphlab::tostr(NWORDS) + ",\n" +
      "\t\"ndocs\":   " + graphlab::tostr(NDOCS) + ",\n" +
      "\t\"ntokens\": " + graphlab::tostr(NTOKENS) + ",\n" +
      "\t\"alpha\":   " + graphlab::tostr(ALPHA) + ",\n" +
      "\t\"beta\":    " + graphlab::tostr(BETA) + ",\n";
  } // end of json header string
} TOP_WORDS;


/**
 * \brief This method is called by the web interface to construct and
 * return the word clouds.
 */
std::pair<std::string, std::string>
word_cloud_callback(std::map<std::string, std::string>& varmap) {
  TOP_WORDS.lock.lock();
  const std::pair<std::string, std::string>
    pair("text/html",TOP_WORDS.json_string);
  TOP_WORDS.lock.unlock();
  return pair;
}


/**
 * \brief Create a token changes event tracker which is reported in
 * the GraphLab metrics dashboard.
 */
DECLARE_EVENT(TOKEN_CHANGES);


// Graph Types
// ============================================================================

/**
 * \brief The vertex data represents each term and document in the
 * corpus and contains the counts of tokens in each topic.
 */
struct vertex_data {
  ///! The total number of updates
  uint32_t nupdates;
  ///! The total number of changes to adjacent tokens
  uint32_t nchanges;
  ///! The count of tokens in each topic
  factor_type factor;
  vertex_data() : nupdates(0), nchanges(0), factor(NTOPICS) { }
  void save(graphlab::oarchive& arc) const {
    arc << nupdates << nchanges << factor;
  }
  void load(graphlab::iarchive& arc) {
    arc >> nupdates >> nchanges >> factor;
  }
}; // end of vertex_data


/**
 * \brief The edge data represents the individual tokens (word,doc)
 * pairs and their assignment to topics.
 */
struct edge_data {
  ///! The number of changes on the last update
  uint16_t nchanges;
  ///! The assignment of all tokens
  assignment_type assignment;
  edge_data(size_t ntokens = 0) : nchanges(0), assignment(ntokens, NULL_TOPIC) { }
  void save(graphlab::oarchive& arc) const { arc << nchanges << assignment; }
  void load(graphlab::iarchive& arc) { arc >> nchanges >> assignment; }
}; // end of edge_data


/**
 * \brief The LDA graph is a bipartite graph with docs connected to
 * terms if the term occurs in the document.
 *
 * The edges store the number of occurrences of the term in the
 * document as a vector of the assignments of that term in that
 * document to topics.
 *
 * The vertices store the total topic counts.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/**
 * \brief Edge data parser used in graph.load_json
 *
 * Make sure that the edge file list
 * has docids from -2 to -(total #docid) and wordids 0 to (total #words -1)
 */
bool eparser(edge_data& ed, const std::string& line){
  const int BASE = 10;
  char* next_char_ptr = NULL;
  size_t count = strtoul(line.c_str(), &next_char_ptr, BASE);
  if(next_char_ptr ==NULL) return false;

  //threshold count
  count = std::min(count, MAX_COUNT);
  ed = (edge_data(count));
  return true;
}

/**
 * \brief Vertex data parser used in graph.load_json
 */
bool vparser(vertex_data& vd, const std::string& line){
  vd = vertex_data();
  return true;
}


/**
 * \brief The graph loader is used by graph.load to parse lines of the
 * text data file.
 *
 * The global variable MAX_COUNT limits the number of tokens that can
 * be constructed on a particular edge.
 *
 * We use the relativley fast boost::spirit parser to parse each line.
 */
bool graph_loader(graph_type& graph, const std::string& fname,
                  const std::string& line) {
  ASSERT_FALSE(line.empty());
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;

  graphlab::vertex_id_type doc_id(-1), word_id(-1);
  size_t count = 0;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(doc_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(word_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(count) = qi::_1]
      )
     ,
     //  End grammar
     ascii::space); 
  if(!success) return false;  
  // Threshold the count
  count = std::min(count, MAX_COUNT);
  // since this is a bipartite graph I need a method to number the
  // left and right vertices differently.  To accomplish I make sure
  // all vertices have non-zero ids and then negate the right vertex.
  // Unfortunatley graphlab reserves -1 and so we add 2 and negate.
  doc_id += 2;
  ASSERT_GT(doc_id, 1);
  doc_id = -doc_id;
  ASSERT_NE(doc_id, word_id);
  // Create an edge and add it to the graph
  graph.add_edge(doc_id, word_id, edge_data(count));
  return true; // successful load
}; // end of graph loader


/**
 * \brief Determine if the given vertex is a word vertex or a doc
 * vertex.
 *
 * For simplicity we connect docs --> words and therefore if a vertex
 * has in edges then it is a word.
 */
inline bool is_word(const graph_type::vertex_type& vertex) {
  return vertex.num_in_edges() > 0 ? 1 : 0;
}


/**
 * \brief Determine if the given vertex is a doc vertex
 *
 * For simplicity we connect docs --> words and therefore if a vertex
 * has out edges then it is a doc
 */
inline bool is_doc(const graph_type::vertex_type& vertex) {
  return vertex.num_out_edges() > 0 ? 1 : 0;
}

/**
 * \brief return the number of tokens on a particular edge.
 */
inline size_t count_tokens(const graph_type::edge_type& edge) {
  return edge.data().assignment.size();
}


/**
 * \brief Get the other vertex in the edge.
 */
inline graph_type::vertex_type
get_other_vertex(const graph_type::edge_type& edge,
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}


// ========================================================
// The Collapsed Gibbs Sampler Function


/**
 * \brief The gather type for the collapsed Gibbs sampler is used to
 * collect the topic counts on adjacent edges so that the apply
 * function can compute the correct topic counts for the center
 * vertex.
 *
 */
struct gather_type {
  factor_type factor;
  uint32_t nchanges;
  gather_type() : nchanges(0) { };
  gather_type(uint32_t nchanges) : factor(NTOPICS), nchanges(nchanges) { };
  void save(graphlab::oarchive& arc) const { arc << factor << nchanges; }
  void load(graphlab::iarchive& arc) { arc >> factor >> nchanges; }
  gather_type& operator+=(const gather_type& other) {
    factor += other.factor;
    nchanges += other.nchanges;
    return *this;
  }
}; // end of gather type


/**
 * \brief The collapsed Gibbs sampler vertex program updates the topic
 * counts for the center vertex and then draws new topic assignments
 * for each edge durring the scatter phase.
 * 
 */
class cgs_lda_vertex_program :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE {
public:

  /**
   * \brief At termination we want to disable sampling to allow the
   * correct final counts to be computed.
   */
  static bool DISABLE_SAMPLING; 

  /** \brief gather on all edges */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } // end of gather_edges

  /**
   * \brief Collect the current topic count on each edge.
   */
  gather_type gather(icontext_type& context, const vertex_type& vertex,
                     edge_type& edge) const {
    gather_type ret(edge.data().nchanges);
    const assignment_type& assignment = edge.data().assignment;
    foreach(topic_id_type asg, assignment) {
      if(asg != NULL_TOPIC) ++ret.factor[asg];
    }
    return ret;
  } // end of gather


  /**
   * \brief Update the topic count for the center vertex.  This
   * ensures that the center vertex has the correct topic count before
   * resampling the topics for each token along each edge.
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    const size_t num_neighbors = vertex.num_in_edges() + vertex.num_out_edges();
    ASSERT_GT(num_neighbors, 0);
    // There should be no new edge data since the vertex program has been cleared
    vertex_data& vdata = vertex.data();
    ASSERT_EQ(sum.factor.size(), NTOPICS);
    ASSERT_EQ(vdata.factor.size(), NTOPICS);
    vdata.nupdates++;
    vdata.nchanges = sum.nchanges;
    vdata.factor = sum.factor;
  } // end of apply


  /**
   * \brief Scatter on all edges if the computation is on-going.
   * Computation stops after bunrin or when disable sampling is set to
   * true.
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return (DISABLE_SAMPLING || (BURNIN > 0 && context.elapsed_seconds() > BURNIN))? 
      graphlab::NO_EDGES : graphlab::ALL_EDGES;
  }; // end of scatter edges


  /**
   * \brief Draw new topic assignments for each edge token.
   *
   * Note that we exploit the GraphLab caching model here by DIRECTLY
   * modifying the topic counts of adjacent vertices.  Making the
   * changes immediately visible to any adjacent vertex programs
   * running on the same machine.  However, these changes will be
   * overwritten during the apply step and are only used to accelerate
   * sampling.  This is a potentially dangerous violation of the
   * abstraction and should be taken with caution.  In our case all
   * vertex topic counts are preallocated and atomic operations are
   * used.  In addition during the sampling phase we must be careful
   * to guard against potentially negative temporary counts.
   */
  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    factor_type& doc_topic_count =  is_doc(edge.source()) ?
      edge.source().data().factor : edge.target().data().factor;
    factor_type& word_topic_count = is_word(edge.source()) ?
      edge.source().data().factor : edge.target().data().factor;
    ASSERT_EQ(doc_topic_count.size(), NTOPICS);
    ASSERT_EQ(word_topic_count.size(), NTOPICS);
    // run the actual gibbs sampling
    std::vector<double> prob(NTOPICS);
    assignment_type& assignment = edge.data().assignment;
    edge.data().nchanges = 0;
    foreach(topic_id_type& asg, assignment) {
      const topic_id_type old_asg = asg;
      if(asg != NULL_TOPIC) { // construct the cavity
        --doc_topic_count[asg];
        --word_topic_count[asg];
        --GLOBAL_TOPIC_COUNT[asg];
      }
      for(size_t t = 0; t < NTOPICS; ++t) {
        const double n_dt =
          std::max(count_type(doc_topic_count[t]), count_type(0));
        const double n_wt =
          std::max(count_type(word_topic_count[t]), count_type(0));
        const double n_t  =
          std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0));
        prob[t] = (ALPHA + n_dt) * (BETA + n_wt) / (BETA * NWORDS + n_t);
      }
      asg = graphlab::random::multinomial(prob);
      // asg = std::max_element(prob.begin(), prob.end()) - prob.begin();
      ++doc_topic_count[asg];
      ++word_topic_count[asg];
      ++GLOBAL_TOPIC_COUNT[asg];
      if(asg != old_asg) {
        ++edge.data().nchanges;
        INCREMENT_EVENT(TOKEN_CHANGES,1);
      }
    } // End of loop over each token
    // singla the other vertex
    context.signal(get_other_vertex(edge, vertex));
  } // end of scatter function

}; // end of cgs_lda_vertex_program


bool cgs_lda_vertex_program::DISABLE_SAMPLING = false;


/**
 * \brief The icontext type associated with the cgs_lda_vertex program
 * is needed for all aggregators.
 */
typedef cgs_lda_vertex_program::icontext_type icontext_type;


// ========================================================
// Aggregators


/**
 * \brief The topk aggregator is used to periodically compute and
 * display the topk most common words in each topic.
 *
 * The number of words is determined by the global variable \ref TOPK
 * and the interval is determined by the global variable \ref INTERVAL.
 *
 */
class topk_aggregator {
  typedef std::pair<float, graphlab::vertex_id_type> cw_pair_type;
private:
  std::vector< std::set<cw_pair_type> > top_words;
  size_t nchanges, nupdates;
public:
  topk_aggregator(size_t nchanges = 0, size_t nupdates = 0) :
    nchanges(nchanges), nupdates(nupdates) { }

  void save(graphlab::oarchive& arc) const { arc << top_words << nchanges; }
  void load(graphlab::iarchive& arc) { arc >> top_words >> nchanges; }


  topk_aggregator& operator+=(const topk_aggregator& other) {
    nchanges += other.nchanges;
    nupdates += other.nupdates;
    if(other.top_words.empty()) return *this;
    if(top_words.empty()) top_words.resize(NTOPICS);
    for(size_t i = 0; i < top_words.size(); ++i) {
      // Merge the topk
      top_words[i].insert(other.top_words[i].begin(),
                          other.top_words[i].end());
      // Remove excess elements
      while(top_words[i].size() > TOPK)
        top_words[i].erase(top_words[i].begin());
    }
    return *this;
  } // end of operator +=

  static topk_aggregator map(icontext_type& context,
                             const graph_type::vertex_type& vertex) {
    topk_aggregator ret_value;
    const vertex_data& vdata = vertex.data();
    ret_value.nchanges = vdata.nchanges;
    ret_value.nupdates = vdata.nupdates;
    if(is_word(vertex)) {
      const graphlab::vertex_id_type wordid = vertex.id();
      ret_value.top_words.resize(vdata.factor.size());
      for(size_t i = 0; i < vdata.factor.size(); ++i) {
        const cw_pair_type pair(vdata.factor[i], wordid);
        ret_value.top_words[i].insert(pair);
      }
    }
    return ret_value;
  } // end of map function


  static void finalize(icontext_type& context,
                       const topk_aggregator& total) {
    if(context.procid() != 0) return;
    std::string json = "{\n"+ TOP_WORDS.json_header_string() +
      "\t\"values\": [\n";
    for(size_t i = 0; i < total.top_words.size(); ++i) {
      std::cout << "Topic " << i << ": ";
      json += "\t[\n";
      size_t counter = 0;
      rev_foreach(cw_pair_type pair, total.top_words[i])  {
      ASSERT_LT(pair.second, DICTIONARY.size());
        json += "\t\t[\"" + DICTIONARY[pair.second] + "\", " +
          graphlab::tostr(pair.first) + "]";
        if(++counter < total.top_words[i].size()) json += ", ";
        json += '\n';
        std::cout << DICTIONARY[pair.second]
                  << "(" << pair.first << ")" << ", ";
        // std::cout << DICTIONARY[pair.second] << ",  ";
      }
      json += "\t]";
      if(i+1 < total.top_words.size()) json += ", ";
      json += '\n';
      std::cout << std::endl;
    }
    json += "]}";
    // Post the change to the global variable
    TOP_WORDS.lock.lock();
    TOP_WORDS.json_string.swap(json);
    TOP_WORDS.lock.unlock();

    std::cout << "\nNumber of token changes: " << total.nchanges << std::endl;
    std::cout << "\nNumber of updates:       " << total.nupdates << std::endl;
  } // end of finalize
}; // end of topk_aggregator struct


/**
 * \brief The global counts aggregator computes the total number of
 * tokens in each topic across all words and documents and then
 * updates the \ref GLOBAL_TOPIC_COUNT variable.
 *
 */
struct global_counts_aggregator {
  typedef graph_type::vertex_type vertex_type;
  static factor_type map(icontext_type& context, const vertex_type& vertex) {
    return vertex.data().factor;
  } // end of map function

  static void finalize(icontext_type& context, const factor_type& total) {
    size_t sum = 0;
    for(size_t t = 0; t < total.size(); ++t) {
      GLOBAL_TOPIC_COUNT[t] =
        std::max(count_type(total[t]/2), count_type(0));
      sum += GLOBAL_TOPIC_COUNT[t];
    }
    context.cout() << "Total Tokens: " << sum << std::endl;
  } // end of finalize
}; // end of global_counts_aggregator struct


/**
 * Computing log_gamma can be a bit slow so this class precomptues 
 * log gamma for a subset of values.
 */
class log_gamma {
  double offset;
  std::vector<double> values;
public:
  log_gamma(): offset(1.0) {}

  void init(const double& new_offset, const size_t& buckets) {
    using boost::math::lgamma;
    ASSERT_GT(offset, 0.0);
    values.resize(buckets);
    offset = new_offset;
    for(size_t i = 0; i < values.size(); ++i) {
      values[i] = lgamma(i + offset);
    }
  }

  double operator()(const count_type& index) const {
    using boost::math::lgamma;
    if(index < values.size() && index >= 0) { return values[index]; }
    else { return lgamma(index + offset); }
  }

};

log_gamma ALPHA_LGAMMA;
log_gamma BETA_LGAMMA;

/**
 * \brief The Likelihood aggregators maintains the current estimate of
 * the log-likelihood of the current token assignments.
 *
 *  llik_words_given_topics = ...
 *    ntopics * (gammaln(nwords * beta) - nwords * gammaln(beta)) - ...
 *    sum_t(gammaln( n_t + nwords * beta)) +
 *    sum_w(sum_t(gammaln(n_wt + beta)));
 *
 *  llik_topics = ...
 *    ndocs * (gammaln(ntopics * alpha) - ntopics * gammaln(alpha)) + ...
 *    sum_d(sum_t(gammaln(n_td + alpha)) - gammaln(sum_t(n_td) + ntopics * alpha));
 *
 * Latex formulation:
 *
    \mathcal{L}( w | z) & = T * \left( \log\Gamma(W * \beta) - W * \log\Gamma(\beta) \right) + \\
    & \sum_{t} \left( \left(\sum_{w} \log\Gamma(N_{wt} + \beta)\right) - 
           \log\Gamma\left( W * \beta + \sum_{w} N_{wt}  \right) \right) \\
    & = T * \left( \log\Gamma(W * \beta) - W * \log\Gamma(\beta) \right) - 
        \sum_{t} \log\Gamma\left( W * \beta + N_{t}  \right) + \\
    & \sum_{w} \sum_{t} \log\Gamma(N_{wt} + \beta)   \\
    \\
    \mathcal{L}(z) & = D * \left(\log\Gamma(T * \alpha) - T * \log\Gamma(\alpha) \right) + \\
    & \sum_{d} \left( \left(\sum_{t}\log\Gamma(N_{td} + \alpha)\right) -  
        \log\Gamma\left( T * \alpha + \sum_{t} N_{td} \right) \right) \\
    \\
    \mathcal{L}(w,z) & = \mathcal{L}(w | z) + \mathcal{L}(z)
 *
 */
class likelihood_aggregator : public graphlab::IS_POD_TYPE {
  typedef graph_type::vertex_type vertex_type;
  double lik_words_given_topics;
  double lik_topics;
public:
  likelihood_aggregator() : lik_words_given_topics(0), lik_topics(0) { }

  likelihood_aggregator& operator+=(const likelihood_aggregator& other) {
    lik_words_given_topics += other.lik_words_given_topics;
    lik_topics += other.lik_topics;
    return *this;
  } // end of operator +=

  static likelihood_aggregator
  map(icontext_type& context, const vertex_type& vertex) {
    // using boost::math::lgamma;
    const factor_type& factor = vertex.data().factor;
    ASSERT_EQ(factor.size(), NTOPICS);
    likelihood_aggregator ret;
    if(is_word(vertex)) {
      for(size_t t = 0; t < NTOPICS; ++t) {
        const count_type value = std::max(count_type(factor[t]), count_type(0));
        //ret.lik_words_given_topics += lgamma(value + BETA);
        ret.lik_words_given_topics += BETA_LGAMMA(value);
      }
    } else {  ASSERT_TRUE(is_doc(vertex));
      double ntokens_in_doc = 0;
      for(size_t t = 0; t < NTOPICS; ++t) {
        const count_type value = std::max(count_type(factor[t]), count_type(0));
        //ret.lik_topics += lgamma(value + ALPHA);
        ret.lik_topics += ALPHA_LGAMMA(value);
        ntokens_in_doc += value;
      }
      ret.lik_topics -= lgamma(ntokens_in_doc + NTOPICS * ALPHA);
    }
    return ret;
  } // end of map function

  static void finalize(icontext_type& context, const likelihood_aggregator& total) {
    using boost::math::lgamma;
    // Address the global sum terms
    double denominator = 0;
    for(size_t t = 0; t < NTOPICS; ++t) {
      const count_type value = 
        std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0));
      denominator += lgamma(value + NWORDS * BETA);
    } // end of for loop

    const double lik_words_given_topics =
      NTOPICS * (lgamma(NWORDS * BETA) - NWORDS * lgamma(BETA)) -
      denominator + total.lik_words_given_topics;

    const double lik_topics =
      NDOCS * (lgamma(NTOPICS * ALPHA) - NTOPICS * lgamma(ALPHA)) +
      total.lik_topics;

    const double lik = lik_words_given_topics + lik_topics;
    context.cout() << "Likelihood: " << lik << std::endl;
  } // end of finalize
}; // end of likelihood_aggregator struct


/**
 * \brief The selective signal functions are used to signal only the
 * vertices corresponding to words or documents.  This is done by
 * using the iengine::map_reduce_vertices function.
 */
struct signal_only {
  /**
   * \brief Signal only the document vertices and skip the word
   * vertices.
   */ 
  static graphlab::empty
  docs(icontext_type& context, const graph_type::vertex_type& vertex) {
    if(is_doc(vertex)) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_docs
 
 /**
  * \brief Signal only the word vertices and skip the document
  * vertices.
  */
  static graphlab::empty
  words(icontext_type& context, const graph_type::vertex_type& vertex) {
    if(is_word(vertex)) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_words
}; // end of selective_only


/**
 * \brief This function is used to load and then initialize the data
 * graph (corpus) from a folder or file.
 * 
 * The graph can be in either json form constructed using the graph
 * builder tools or in raw text form.  The raw text format contains a
 * token on each line of each file in the format:
 *
 \verbatim
 <docid> <wordid> <count>
          ...
 \endverbatim
 *
 * for example:
 \verbatim
    0    0     2
    0    4     1
    0    2     3
 \endverbatim
 * 
 * implies that document zero contains word zero twice, word 4 once,
 * and word two three times.
 *
 * If a dictionary is used it is important that each word id
 * correspond to the index in the dictionary file (starting at zero).
 *
 * Once loaded the total number of words, documents, and tokens is
 * counted and saved to global variables which are read during the
 * execution of the sampler.
 *
 * \param [in] dc The distributed control object used to coordinate
 * between machines.
 *
 * \param [in,out] graph The graph object that is initialized.
 * 
 * \param [in] corpus_dir The directory or file containing the graph
 * data.  The corpus directory can reside on hdfs in which case the
 * path should begin with "hdfs://namenode".  In addition the file(s)
 * may be gzipped and therefore must end in ".gz".
 *
 * \param [in] load_json Whether the graph data is in text format or
 * preprocessed json format using the graph builder tools.
 */
bool load_and_initialize_graph(graphlab::distributed_control& dc,
                               graph_type& graph,
                               const std::string& corpus_dir,
                               const std::string& format			       
			       ) {
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; timer.start();

  if(format=="matrix"){
      dc.cout() << "matrix format" << std::endl;
      graph.load(corpus_dir, graph_loader);
  // } else if(format=="json"){
  //     dc.cout() << "json format" << std::endl;
  //     graph.load_json(corpus_dir, false, eparser, vparser);
  // } else if(format=="json-gzip"){
  //     dc.cout() <<"json gzip format" << std::endl;
  //     graph.load_json(corpus_dir, true, eparser, vparser);
  }else{
      dc.cout() << "Non supported format. See --help" << std::endl;
      return false;
  }

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in "
            << timer.current_time() << " seconds." << std::endl;

  dc.cout() << "Computing number of words and documents." << std::endl;
  NWORDS = graph.map_reduce_vertices<size_t>(is_word);
  NDOCS = graph.map_reduce_vertices<size_t>(is_doc);
  NTOKENS = graph.map_reduce_edges<size_t>(count_tokens);


  dc.cout() << "Number of words:     " << NWORDS  << std::endl;
  dc.cout() << "Number of docs:      " << NDOCS   << std::endl;
  dc.cout() << "Number of tokens:    " << NTOKENS << std::endl;

  ASSERT_GT(NWORDS, 0);
  ASSERT_GT(NDOCS, 0);
  ASSERT_GT(NTOKENS, 0);


  // Prepare the json struct with the word counts
  TOP_WORDS.lock.lock();
  TOP_WORDS.json_string = "{\n" + TOP_WORDS.json_header_string() +
    "\t\"values\": [] \n }";
  TOP_WORDS.lock.unlock();
  return true;
} // end of load and initialize graph


/**
 * \brief Load the dictionary global variable from the file containing
 * the terms (one term per line).
 *
 * Note that while graphs can be loaded from multiple files the
 * dictionary must be in a single file.  The dictionary is loaded
 * entirely into memory and used to display word clouds and the top
 * terms in each topic.
 *
 * \param [in] fname the file containing the dictionary data.  The
 * data can be located on HDFS and can also be gzipped (must end in
 * ".gz").
 * 
 */
bool load_dictionary(const std::string& fname)  {
  // std::cout << "staring load on: "
  //           << graphlab::get_local_ip_as_str() << std::endl;
  const bool gzip = boost::ends_with(fname, ".gz");
  // test to see if the graph_dir is an hadoop path
  if(boost::starts_with(fname, "hdfs://")) {
    graphlab::hdfs hdfs;
    graphlab::hdfs::fstream in_file(hdfs, fname);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;
    fin.set_auto_close(false);
    if(gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    while(std::getline(fin,term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } else {
    std::cout << "opening: " << fname << std::endl;
    std::ifstream in_file(fname.c_str(),
                          std::ios_base::in | std::ios_base::binary);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;
    if (gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good() || !fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    std::cout << "Loooping" << std::endl;
    while(std::getline(fin, term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } // end of else
  // std::cout << "Finished load on: "
  //           << graphlab::get_local_ip_as_str() << std::endl;
  std::cout << "Dictionary Size: " << DICTIONARY.size() << std::endl;
  return true;
} // end of load dictionary


struct count_saver {
  bool save_words;
  count_saver(bool save_words) : save_words(save_words) { }
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    // Skip saving vertex data if the vertex type is not consistent
    // with the save type
    if((save_words && is_doc(vertex)) ||
       (!save_words && is_word(vertex))) return "";
    // Proceed to save
    std::stringstream strm;
    if(save_words) {
      const graphlab::vertex_id_type vid = vertex.id();
      strm << vid << '\t';
    } else { // save documents
      const graphlab::vertex_id_type vid = (-vertex.id()) - 2;
      strm << vid << '\t';
    }
    const factor_type& factor = vertex.data().factor;
    for(size_t i = 0; i < factor.size(); ++i) { 
      strm << factor[i];
      if(i+1 < factor.size()) strm << '\t';
    }
    strm << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; //nop
  }
}; // end of prediction_saver


/**
 * \brief The omni engine type is used to allow switching between
 * synchronous and asynchronous computation. 
 */
typedef graphlab::omni_engine<cgs_lda_vertex_program> engine_type;


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  //  INITIALIZE_EVENT_LOG(dc);
  ADD_CUMULATIVE_EVENT(TOKEN_CHANGES, "Token Changes", "Changes");

  // Parse command line options -----------------------------------------------
  const std::string description =
    "\n=========================================================================\n"
    "The Collapsed Gibbs Sampler for the LDA model implements\n"
    "a highly asynchronous version of parallel LDA in which document\n"
    "and word counts are maintained in an eventually consistent\n"
    "manner.\n"
    "\n"
    "The standard usage is: \n"
    "\t./cgs_lda --dictionary dictionary.txt --corpus doc_word_count.tsv\n"
    "where dictionary.txt contains: \n"
    "\taaa \n\taaai \n\tabalone \n\t   ... \n"
    "each line number corresponds to wordid (i.e aaa has wordid=0)\n\n"
    "and doc_word_count.tsv is formatted <docid> <wordid> <count>:\n"
    "(where wordid is indexed starting from zero and docid are positive integers)\n"
    "\t0\t0\t3\n"
    "\t0\t5\t1\n"
    "\t ...\n\n"
    "For JSON format, make sure docid are negative integers index starting from -2 \n\n"
    "To learn more about the NLP package and its applications visit\n\n"
    "\t\t http://graphlab.org \n\n"
    "Additional Options";
  graphlab::command_line_options clopts(description);
  std::string corpus_dir;
  std::string dictionary_fname;
  std::string doc_dir;
  std::string word_dir;
  std::string exec_type = "asynchronous";
  std::string format = "matrix";
  
  clopts.attach_option("dictionary", dictionary_fname,
                       "The file containing the list of unique words");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("corpus", corpus_dir,
                       "The directory or file containing the corpus data.");
  clopts.add_positional("corpus");
  clopts.attach_option("ntopics", NTOPICS,
                       "Number of topics to use.");
  clopts.attach_option("alpha", ALPHA,
                       "The document hyper-prior");
  clopts.attach_option("beta", BETA,
                       "The word hyper-prior");
  clopts.attach_option("topk", TOPK,
                       "The number of words to report");
  clopts.attach_option("interval", INTERVAL,
                       "statistics reporting interval (in seconds)");
  clopts.attach_option("lik_interval", LIK_INTERVAL,
                       "likelihood reporting interval (in seconds)");
  clopts.attach_option("max_count", MAX_COUNT,
                       "The maximum number of occurences of a word in a document.");
  clopts.attach_option("format", format,
                       "Formats: matrix,json,json-gzip");
  clopts.attach_option("burnin", BURNIN, 
                       "The time in second to run until a sample is collected. "
                       "If less than zero the sampler runs indefinitely.");
  clopts.attach_option("doc_dir", doc_dir,
                       "The output directory to save the final document counts.");
  clopts.attach_option("word_dir", word_dir,
                       "The output directory to save the final words counts.");


  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  if(dictionary_fname.empty()) {
    logstream(LOG_WARNING) << "No dictionary file was provided." << std::endl
                           << "Top k words will not be estimated." << std::endl;
  }

  if(corpus_dir.empty()) {
    logstream(LOG_ERROR) << "No corpus file was provided." << std::endl;
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();
  graphlab::add_metric_server_callback("wordclouds", word_cloud_callback);


  ///! Initialize global variables
  GLOBAL_TOPIC_COUNT.resize(NTOPICS);
  if(!dictionary_fname.empty()) {
    const bool success = load_dictionary(dictionary_fname);
    if(!success) {
      logstream(LOG_ERROR) << "Error loading dictionary." << std::endl;
      return EXIT_FAILURE;
    }
  }

  if(ALPHA <= 0) {
    logstream(LOG_ERROR) 
      << "Alpha must be positive (alpha=" << ALPHA << ")!"  << std::endl;
    return EXIT_FAILURE;
  }

  if(BETA <= 0) {
    logstream(LOG_ERROR) 
      << "Beta must be positive (beta=" << BETA << ")!"  << std::endl;
    return EXIT_FAILURE;
  }
   
  /// Initialize the log_gamma precached calculations.
  ALPHA_LGAMMA.init(ALPHA, 100000);
  BETA_LGAMMA.init(BETA, 1000000);


  ///! load the graph
  graph_type graph(dc, clopts);
  {
    const bool success = 
      load_and_initialize_graph(dc, graph, corpus_dir, format);
    if(!success) {
      logstream(LOG_ERROR) << "Error loading graph." << std::endl;
      return EXIT_FAILURE;
    }
  }


  const size_t ntokens = graph.map_reduce_edges<size_t>(count_tokens);
  dc.cout() << "Total tokens: " << ntokens << std::endl;


  engine_type engine(dc, graph, exec_type, clopts);
  ///! Add an aggregator
  if(!DICTIONARY.empty()) {
    const bool success =
      engine.add_vertex_aggregator<topk_aggregator>
      ("topk", topk_aggregator::map, topk_aggregator::finalize) &&
      engine.aggregate_periodic("topk", INTERVAL);
    ASSERT_TRUE(success);
  }

  { // Add the Global counts aggregator
    const bool success =
      engine.add_vertex_aggregator<factor_type>
      ("global_counts", 
       global_counts_aggregator::map, 
       global_counts_aggregator::finalize) &&
      engine.aggregate_periodic("global_counts", 5);
    ASSERT_TRUE(success);
  }
  
  { // Add the likelihood aggregator
    const bool success =
      engine.add_vertex_aggregator<likelihood_aggregator>
      ("likelihood", 
       likelihood_aggregator::map, 
       likelihood_aggregator::finalize) &&
      engine.aggregate_periodic("likelihood", LIK_INTERVAL);
    ASSERT_TRUE(success);
  }

  ///! schedule only documents
  dc.cout() << "Running The Collapsed Gibbs Sampler" << std::endl;
  engine.map_reduce_vertices<graphlab::empty>(signal_only::docs);
  graphlab::timer timer;
  // Enable sampling
  cgs_lda_vertex_program::DISABLE_SAMPLING = false;
  // Run the engine
  engine.start();
  // Finalize the counts
  cgs_lda_vertex_program::DISABLE_SAMPLING = true;
  engine.signal_all();
  engine.start();
  
  const double runtime = timer.current_time();
  dc.cout()
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): "
    << engine.num_updates() / runtime << std::endl;
  
  
  if(!word_dir.empty()) {
    // save word topic counts
    const bool gzip_output = false;
    const bool save_vertices = true;
    const bool save_edges = false;
    const size_t threads_per_machine = 2;
    const bool save_words = true;
    graph.save(word_dir, count_saver(save_words),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }

  
  if(!doc_dir.empty()) {
    // save doc topic counts
    const bool gzip_output = false;
    const bool save_vertices = true;
    const bool save_edges = false;
    const size_t threads_per_machine = 2;
    const bool save_words = false;
    graph.save(doc_dir, count_saver(save_words),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);

  }


  graphlab::stop_metric_server_on_eof();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/topic_modeling/cgs_lda_mimno_experimental.cpp
================================================
/*
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


/**
 * \file cgs_lda.cpp
 *
 * \brief This file contains a GraphLab based implementation of the
 * Collapsed Gibbs Sampler (CGS) for the Latent Dirichlet Allocation
 * (LDA) model.
 *
 * 
 *
 * \author Joseph Gonzalez, Diana Hu
 */

#include <vector>
#include <algorithm>

#include <graphlab/ui/mongoose/mongoose.h>
#include <boost/math/special_functions/gamma.hpp>
#include <vector>
#include <algorithm>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <graphlab/parallel/atomic.hpp>


// Global Types
// ============================================================================
typedef int count_type;


/**
 * \brief The factor type is used to store the counts of tokens in
 * each topic for words, documents, and assignments.
 *
 * Atomic counts are used because we violate the abstraction by
 * modifying adjacent vertex data on scatter.  As a consequence
 * multiple threads on the same machine may try to update the same
 * vertex data at the same time.  The graphlab::atomic type ensures
 * that multiple increments are serially consistent.
 */
typedef std::vector<count_type> factor_type;


/**
 * \brief We use the factor type in accumulators and so we define an
 * operator+=
 */
inline factor_type& operator+=(factor_type& lvalue,
                               const factor_type& rvalue) {
  if(!rvalue.empty()) {
    if(lvalue.empty()) lvalue = rvalue;
    else {
      for(size_t t = 0; t < lvalue.size(); ++t) lvalue[t] += rvalue[t];
    }
  }
  return lvalue;
} // end of operator +=

// We include the rest of GraphLab after we define the operator+= for
// vector.
#include <graphlab.hpp>
#include <graphlab/macros_def.hpp>


/**
 * \brief The latent topic id of a token is the smallest reasonable
 * type.
 */
typedef uint16_t topic_id_type;

// We require a null topic to represent the topic assignment for
// tokens that have not yet been assigned.
#define NULL_TOPIC (topic_id_type(-1))


/**
 * \brief The assignment type is used on each edge to store the
 * assignments of each token.  There can be several occurrences of the
 * same word in a given document and so a vector is used to store the
 * assignments of each occurrence.
 */
typedef std::vector< topic_id_type > assignment_type;


// Global Variables
// ============================================================================

/**
 * \brief The alpha parameter determines the sparsity of topics for
 * each document.
 */
float ALPHA = 1;

/**
 * \brief the Beta parameter determines the sparsity of words in each
 * document.
 */
float BETA = 0.1;

/**
 * \brief the total number of topics to uses
 */
size_t NTOPICS = 50;

/**
 * \brief The total number of words in the dataset.
 */
size_t NWORDS = 0;

/**
 * \brief The total number of docs in the dataset.
 */
size_t NDOCS = 0;

/**
 * \brief The total number of tokens in the corpus
 */
size_t NTOKENS = 0;


/**
 * \brief The number of top words to display during execution (from
 * each topic).
 */
size_t TOPK = 5;

/**
 * \brief The interval to display topics during execution.
 */
size_t INTERVAL = 10;

/**
 * \brief The global variable storing the global topic count across
 * all machines.  This is maintained periodically using aggregation.
 */
factor_type GLOBAL_TOPIC_COUNT;

/**
 * \brief A dictionary of words used to print the top words during
 * execution.
 */
std::vector<std::string> DICTIONARY;

/**
 * \brief The maximum occurences allowed for an individual term-doc
 * pair. (edge data)
 */
size_t MAX_COUNT = 100;


/**
 * \brief The time to run until the first sample is taken.  If less
 * than zero then the sampler will run indefinitely.
 */
float BURNIN = -1;


float MIMNO_S;

/**
 * \brief The json top word struct contains the current set of top
 * words for each topic encoded in the form of a json string.
 */
struct top_words_type {
  graphlab::mutex lock;
  std::string json_string;
  top_words_type() : 
    json_string("{\n" + json_header_string() + "\tvalues: [] \n }") { }
  inline std::string json_header_string() const {
    return
      "\t\"ntopics\": " + graphlab::tostr(NTOPICS) + ",\n" +
      "\t\"nwords\":  " + graphlab::tostr(NWORDS) + ",\n" +
      "\t\"ndocs\":   " + graphlab::tostr(NDOCS) + ",\n" +
      "\t\"ntokens\": " + graphlab::tostr(NTOKENS) + ",\n" +
      "\t\"alpha\":   " + graphlab::tostr(ALPHA) + ",\n" +
      "\t\"beta\":    " + graphlab::tostr(BETA) + ",\n";
  } // end of json header string
} TOP_WORDS;


/**
 * \brief This method is called by the web interface to construct and
 * return the word clouds.
 */
std::pair<std::string, std::string>
word_cloud_callback(std::map<std::string, std::string>& varmap) {
  TOP_WORDS.lock.lock();
  const std::pair<std::string, std::string>
    pair("text/html",TOP_WORDS.json_string);
  TOP_WORDS.lock.unlock();
  return pair;
}


/**
 * \brief Create a token changes event tracker which is reported in
 * the GraphLab metrics dashboard.
 */
DECLARE_EVENT(TOKEN_CHANGES);


// Graph Types
// ============================================================================

/**
 * \brief The vertex data represents each term and document in the
 * corpus and contains the counts of tokens in each topic.
 */
struct vertex_data {
  ///! The total number of updates
  uint32_t nupdates;
  ///! The total number of changes to adjacent tokens
  uint32_t nchanges;
  ///! The count of tokens in each topic
  factor_type factor;
  float MIMNO_R;

   vertex_data() : nupdates(0), nchanges(0), factor(NTOPICS),MIMNO_R(0) { }
  void save(graphlab::oarchive& arc) const {

    arc << nupdates << nchanges << MIMNO_R; 
    uint16_t ni = 0;
    for (size_t i = 0;i < factor.size(); ++i) {
      ni += (factor[i] > 0);
    }
    arc << ni;
    for (size_t i = 0;i < factor.size(); ++i) {
      if (factor[i] > 0) {
        arc << uint16_t(i) << factor[i];
      }
    }
  }
  void load(graphlab::iarchive& arc) {
    arc >> nupdates >> nchanges >> MIMNO_R; 
    for (size_t i = 0;i < factor.size(); ++i) factor[i] = 0;
    uint16_t ni;
    arc >> ni; 
    for (uint16_t i = 0;i < ni; ++i) {
      uint16_t u; arc >> u;
      arc >> factor[u];
    }

  }
}; // end of vertex_data


/**
 * \brief The edge data represents the individual tokens (word,doc)
 * pairs and their assignment to topics.
 */
struct edge_data {
  ///! The number of changes on the last update
  uint16_t nchanges;
  ///! The assignment of all tokens
  assignment_type assignment;
  edge_data(size_t ntokens = 0) : nchanges(0), assignment(ntokens, NULL_TOPIC) { }
  void save(graphlab::oarchive& arc) const { arc << nchanges << assignment; }
  void load(graphlab::iarchive& arc) { arc >> nchanges >> assignment; }
}; // end of edge_data


/**
 * \brief The LDA graph is a bipartite graph with docs connected to
 * terms if the term occurs in the document.
 *
 * The edges store the number of occurrences of the term in the
 * document as a vector of the assignments of that term in that
 * document to topics.
 *
 * The vertices store the total topic counts.
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


/**
 * \brief Edge data parser used in graph.load_json
 *
 * Make sure that the edge file list
 * has docids from -2 to -(total #docid) and wordids 0 to (total #words -1)
 */
bool eparser(edge_data& ed, const std::string& line){
  const int BASE = 10;
  char* next_char_ptr = NULL;
  size_t count = strtoul(line.c_str(), &next_char_ptr, BASE);
  if(next_char_ptr ==NULL) return false;

  //threshold count
  count = std::min(count, MAX_COUNT);
  ed = (edge_data(count));
  return true;
}

/**
 * \brief Vertex data parser used in graph.load_json
 */
bool vparser(vertex_data& vd, const std::string& line){
  vd = vertex_data();
  return true;
}


/**
 * \brief The graph loader is used by graph.load to parse lines of the
 * text data file.
 *
 * The global variable MAX_COUNT limits the number of tokens that can
 * be constructed on a particular edge.
 *
 * We use the relativley fast boost::spirit parser to parse each line.
 */
bool graph_loader(graph_type& graph, const std::string& fname,
                  const std::string& line) {
  ASSERT_FALSE(line.empty());
  namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
  namespace phoenix = boost::phoenix;

  graphlab::vertex_id_type doc_id(-1), word_id(-1);
  size_t count = 0;
  const bool success = qi::phrase_parse
    (line.begin(), line.end(),       
     //  Begin grammar
     (
      qi::ulong_[phoenix::ref(doc_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(word_id) = qi::_1] >> -qi::char_(',') >>
      qi::ulong_[phoenix::ref(count) = qi::_1]
      )
     ,
     //  End grammar
     ascii::space); 
  if(!success) return false;  
  // Threshold the count
  count = std::min(count, MAX_COUNT);
  // since this is a bipartite graph I need a method to number the
  // left and right vertices differently.  To accomplish I make sure
  // all vertices have non-zero ids and then negate the right vertex.
  // Unfortunatley graphlab reserves -1 and so we add 2 and negate.
  doc_id += 2;
  ASSERT_GT(doc_id, 1);
  doc_id = -doc_id;
  ASSERT_NE(doc_id, word_id);
  // Create an edge and add it to the graph
  graph.add_edge(doc_id, word_id, edge_data(count));
  return true; // successful load
}; // end of graph loader


/**
 * \brief Determine if the given vertex is a word vertex or a doc
 * vertex.
 *
 * For simplicity we connect docs --> words and therefore if a vertex
 * has in edges then it is a word.
 */
inline bool is_word(const graph_type::vertex_type& vertex) {
  return vertex.num_in_edges() > 0 ? 1 : 0;
}


/**
 * \brief Determine if the given vertex is a doc vertex
 *
 * For simplicity we connect docs --> words and therefore if a vertex
 * has out edges then it is a doc
 */
inline bool is_doc(const graph_type::vertex_type& vertex) {
  return vertex.num_out_edges() > 0 ? 1 : 0;
}

/**
 * \brief return the number of tokens on a particular edge.
 */
inline size_t count_tokens(const graph_type::edge_type& edge) {
  return edge.data().assignment.size();
}


/**
 * \brief Get the other vertex in the edge.
 */
inline graph_type::vertex_type
get_other_vertex(const graph_type::edge_type& edge,
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}


// ========================================================
// The Collapsed Gibbs Sampler Function


/**
 * \brief The gather type for the collapsed Gibbs sampler is used to
 * collect the topic counts on adjacent edges so that the apply
 * function can compute the correct topic counts for the center
 * vertex.
 *
 */
struct gather_type {
  factor_type factor;
  uint32_t nchanges;
  gather_type() : nchanges(0) { };
  gather_type(uint32_t nchanges) : factor(NTOPICS), nchanges(nchanges) { };
  void save(graphlab::oarchive& arc) const { 
    arc << nchanges; 
    uint16_t ni = 0;
    for (size_t i = 0;i < factor.size(); ++i) {
      ni += (factor[i] > 0);
    }
    arc << ni;
    for (size_t i = 0;i < factor.size(); ++i) {
      if (factor[i] > 0) {
        arc << uint16_t(i) << factor[i];
      }
    }

  }
  void load(graphlab::iarchive& arc) { 
    arc >> nchanges;
    factor.resize(NTOPICS);
    for (size_t i = 0;i < factor.size(); ++i) factor[i] = 0;
    uint16_t ni;
    arc >> ni; 
    for (uint16_t i = 0;i < ni; ++i) {
      uint16_t u; arc >> u;
      arc >> factor[u];
    }
  }
  gather_type& operator+=(const gather_type& other) {
    factor += other.factor;
    nchanges += other.nchanges;
    return *this;
  }
}; // end of gather type


/**
 * \brief The collapsed Gibbs sampler vertex program updates the topic
 * counts for the center vertex and then draws new topic assignments
 * for each edge durring the scatter phase.
 * 
 */
class cgs_lda_vertex_program :
  public graphlab::ivertex_program<graph_type, gather_type>,
  public graphlab::IS_POD_TYPE
   {
public:

  /**
   * \brief At termination we want to disable sampling to allow the
   * correct final counts to be computed.
   */
  static bool DISABLE_SAMPLING; 

   /** \brief gather on all edges */
  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } // end of gather_edges

  /**
   * \brief Collect the current topic count on each edge.
   */
  gather_type gather(icontext_type& context, const vertex_type& vertex,
                     edge_type& edge) const {
    gather_type ret(edge.data().nchanges);
    const assignment_type& assignment = edge.data().assignment;
    foreach(topic_id_type asg, assignment) {
      if(asg != NULL_TOPIC) ++ret.factor[asg];
    }
    return ret;
  } // end of gather


  /**
   * \brief Update the topic count for the center vertex.  This
   * ensures that the center vertex has the correct topic count before
   * resampling the topics for each token along each edge.
   */
  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    const size_t num_neighbors = vertex.num_in_edges() + vertex.num_out_edges();
    ASSERT_GT(num_neighbors, 0);
    // There should be no new edge data since the vertex program has been cleared
    vertex_data& vdata = vertex.data();
    ASSERT_EQ(sum.factor.size(), NTOPICS);
    ASSERT_EQ(vdata.factor.size(), NTOPICS);
    vdata.nupdates++;
    vdata.nchanges = sum.nchanges;
    vdata.factor = sum.factor;
    if (is_doc(vertex)) {
      float MIMNO_R = 0.0;
      for (size_t i = 0;i < vdata.factor.size(); ++i) {
        MIMNO_R += vdata.factor[i] * BETA / (BETA * NWORDS + GLOBAL_TOPIC_COUNT[i]);
      }
      vdata.MIMNO_R = MIMNO_R;
    }
 } // end of apply


  /**
   * \brief Scatter on all edges if the computation is on-going.
   * Computation stops after bunrin or when disable sampling is set to
   * true.
   */
  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return (DISABLE_SAMPLING || (BURNIN > 0 && context.elapsed_seconds() > BURNIN))? 
      graphlab::NO_EDGES : graphlab::ALL_EDGES;
  }; // end of scatter edges


  /**
   * \brief Draw new topic assignments for each edge token.
   *
   * Note that we exploit the GraphLab caching model here by DIRECTLY
   * modifying the topic counts of adjacent vertices.  Making the
   * changes immediately visible to any adjacent vertex programs
   * running on the same machine.  However, these changes will be
   * overwritten during the apply step and are only used to accelerate
   * sampling.  This is a potentially dangerous violation of the
   * abstraction and should be taken with caution.  In our case all
   * vertex topic counts are preallocated and atomic operations are
   * used.  In addition during the sampling phase we must be careful
   * to guard against potentially negative temporary counts.
   */
  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    factor_type& doc_topic_count =  is_doc(edge.source()) ?
      edge.source().data().factor : edge.target().data().factor;
    factor_type& word_topic_count = is_word(edge.source()) ?
      edge.source().data().factor : edge.target().data().factor;
    ASSERT_EQ(doc_topic_count.size(), NTOPICS);
    ASSERT_EQ(word_topic_count.size(), NTOPICS);
    float MIMNO_R = is_doc(edge.source()) ? edge.source().data().MIMNO_R :
                      edge.target().data().MIMNO_R;
    float MIMNO_Q = 0.0;
    std::vector<float> MIMNO_Q_CACHE(NTOPICS);

    for (size_t t = 0; t < NTOPICS; ++t) {
      const float n_wt  =
        std::max(count_type(word_topic_count[t]), count_type(0));
     if (n_wt > 0) {
      const float n_dt =
          std::max(count_type(doc_topic_count[t]), count_type(0));
      const float n_t  =
        std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0));
       MIMNO_Q_CACHE[t] = (ALPHA + n_dt)/(BETA * NWORDS + n_t); 
       MIMNO_Q_CACHE[t] = MIMNO_Q_CACHE[t] * n_wt; 
       MIMNO_Q += MIMNO_Q_CACHE[t]; 
     }
    }

    // run the actual gibbs sampling
    std::vector<float> prob(NTOPICS);
    assignment_type& assignment = edge.data().assignment;
    edge.data().nchanges = 0;
    foreach(topic_id_type& asg, assignment) {
      const topic_id_type old_asg = asg;
      if(asg != NULL_TOPIC) { // construct the cavity
        --doc_topic_count[asg];
        --word_topic_count[asg];
        --GLOBAL_TOPIC_COUNT[asg];
      const float n_dt =
          std::max(count_type(doc_topic_count[asg]), count_type(0));
      const float n_t  =
        std::max(count_type(GLOBAL_TOPIC_COUNT[asg]), count_type(0));
      const float n_wt  =
        std::max(count_type(word_topic_count[asg]), count_type(0));


        MIMNO_Q -= MIMNO_Q_CACHE[asg];
        MIMNO_Q_CACHE[asg] = (ALPHA + n_dt)/(BETA * NWORDS + n_t) * n_wt;
        MIMNO_Q += MIMNO_Q_CACHE[asg]; 
      }
      asg = 0; 
      ASSERT_GE(MIMNO_S, 0);
      ASSERT_GE(MIMNO_R, 0);
      ASSERT_GE(MIMNO_Q, 0);
      float f = graphlab::random::uniform<float>(0, MIMNO_S + MIMNO_R + MIMNO_Q);
      if (f < MIMNO_S) {
        float ctr = 0;
        
        for (size_t t = 0; t < NTOPICS; ++t) {
          ctr += ALPHA * BETA / (BETA * NWORDS + GLOBAL_TOPIC_COUNT[t]);
          if (ctr >= f) {
            asg = t;
            break;
          }
        }
      }
      else if (f < MIMNO_S + MIMNO_R) {
        float ctr = 0;
        f = f - MIMNO_S;
        for(size_t t = 0; t < NTOPICS; ++t) {
          if (doc_topic_count[t] > 0) {
            ctr += doc_topic_count[t] * BETA / (BETA * NWORDS + GLOBAL_TOPIC_COUNT[t]);
            if (ctr >= f) {
              asg = t;
              break;
            }
          }
        }
      }
      else {
        f = f - MIMNO_S - MIMNO_R;
        float ctr = 0;
        for(size_t t = 0; t < NTOPICS; ++t) {
          if (word_topic_count[t] > 0) {
            ctr += MIMNO_Q_CACHE[t]; 
            if (ctr >= f) {
              asg = t;
              break;
            }
          }
        }
      }
      // asg = std::max_element(prob.begin(), prob.end()) - prob.begin();
      ++doc_topic_count[asg];
      ++word_topic_count[asg];
      ++GLOBAL_TOPIC_COUNT[asg];
      MIMNO_Q -= MIMNO_Q_CACHE[asg];
{
      const float n_dt =
          std::max(count_type(doc_topic_count[asg]), count_type(0));
      const float n_t  =
        std::max(count_type(GLOBAL_TOPIC_COUNT[asg]), count_type(0));
      const float n_wt  =
        std::max(count_type(word_topic_count[asg]), count_type(0));

      MIMNO_Q_CACHE[asg] = (ALPHA + n_dt)/(BETA * NWORDS + n_t) * n_wt;
      MIMNO_Q += MIMNO_Q_CACHE[asg]; 
}
      if(asg != old_asg) {
        ++edge.data().nchanges;
      }
      INCREMENT_EVENT(TOKEN_CHANGES,1);
    } // End of loop over each token
    // singla the other vertex
    context.signal(get_other_vertex(edge, vertex));
  } // end of scatter function

}; // end of cgs_lda_vertex_program


bool cgs_lda_vertex_program::DISABLE_SAMPLING = false;


/**
 * \brief The icontext type associated with the cgs_lda_vertex program
 * is needed for all aggregators.
 */
typedef cgs_lda_vertex_program::icontext_type icontext_type;


// ========================================================
// Aggregators


/**
 * \brief The topk aggregator is used to periodically compute and
 * display the topk most common words in each topic.
 *
 * The number of words is determined by the global variable \ref TOPK
 * and the interval is determined by the global variable \ref INTERVAL.
 *
 */
class topk_aggregator {
  typedef std::pair<float, graphlab::vertex_id_type> cw_pair_type;
private:
  std::vector< std::set<cw_pair_type> > top_words;
  size_t nchanges, nupdates;
public:
  topk_aggregator(size_t nchanges = 0, size_t nupdates = 0) :
    nchanges(nchanges), nupdates(nupdates) { }

  void save(graphlab::oarchive& arc) const { arc << top_words << nchanges; }
  void load(graphlab::iarchive& arc) { arc >> top_words >> nchanges; }


  topk_aggregator& operator+=(const topk_aggregator& other) {
    nchanges += other.nchanges;
    nupdates += other.nupdates;
    if(other.top_words.empty()) return *this;
    if(top_words.empty()) top_words.resize(NTOPICS);
    for(size_t i = 0; i < top_words.size(); ++i) {
      // Merge the topk
      top_words[i].insert(other.top_words[i].begin(),
                          other.top_words[i].end());
      // Remove excess elements
      while(top_words[i].size() > TOPK)
        top_words[i].erase(top_words[i].begin());
    }
    return *this;
  } // end of operator +=

  static topk_aggregator map(icontext_type& context,
                             const graph_type::vertex_type& vertex) {
    topk_aggregator ret_value;
    const vertex_data& vdata = vertex.data();
    ret_value.nchanges = vdata.nchanges;
    ret_value.nupdates = vdata.nupdates;
    if(is_word(vertex)) {
      const graphlab::vertex_id_type wordid = vertex.id();
      ret_value.top_words.resize(vdata.factor.size());
      for(size_t i = 0; i < vdata.factor.size(); ++i) {
        const cw_pair_type pair(vdata.factor[i], wordid);
        ret_value.top_words[i].insert(pair);
      }
    }
    return ret_value;
  } // end of map function


  static void finalize(icontext_type& context,
                       const topk_aggregator& total) {
    if(context.procid() != 0) return;
    std::string json = "{\n"+ TOP_WORDS.json_header_string() +
      "\t\"values\": [\n";
    for(size_t i = 0; i < total.top_words.size(); ++i) {
      std::cout << "Topic " << i << ": ";
      json += "\t[\n";
      size_t counter = 0;
      rev_foreach(cw_pair_type pair, total.top_words[i])  {
      ASSERT_LT(pair.second, DICTIONARY.size());
        json += "\t\t[\"" + DICTIONARY[pair.second] + "\", " +
          graphlab::tostr(pair.first) + "]";
        if(++counter < total.top_words[i].size()) json += ", ";
        json += '\n';
        std::cout << DICTIONARY[pair.second]
                  << "(" << pair.first << ")" << ", ";
        // std::cout << DICTIONARY[pair.second] << ",  ";
      }
      json += "\t]";
      if(i+1 < total.top_words.size()) json += ", ";
      json += '\n';
      std::cout << std::endl;
    }
    json += "]}";
    // Post the change to the global variable
    TOP_WORDS.lock.lock();
    TOP_WORDS.json_string.swap(json);
    TOP_WORDS.lock.unlock();

    std::cout << "\nNumber of token changes: " << total.nchanges << std::endl;
    std::cout << "\nNumber of updates:       " << total.nupdates << std::endl;
  } // end of finalize
}; // end of topk_aggregator struct


/**
 * \brief The global counts aggregator computes the total number of
 * tokens in each topic across all words and documents and then
 * updates the \ref GLOBAL_TOPIC_COUNT variable.
 *
 */
struct global_counts_aggregator {
  typedef graph_type::vertex_type vertex_type;
  static factor_type map(icontext_type& context, const vertex_type& vertex) {
    return vertex.data().factor;
  } // end of map function

  static void finalize(icontext_type& context, const factor_type& total) {
    size_t sum = 0;
    float NEW_MIMNO_S = 0;
    for(size_t t = 0; t < total.size(); ++t) {
      GLOBAL_TOPIC_COUNT[t] =
        std::max(count_type(total[t]/2), count_type(0));
      sum += GLOBAL_TOPIC_COUNT[t];
      NEW_MIMNO_S += ALPHA * BETA / (BETA * NWORDS + (GLOBAL_TOPIC_COUNT[t] > 0 ? GLOBAL_TOPIC_COUNT[t] : 0));
    }
    MIMNO_S = NEW_MIMNO_S;
    context.cout() << "Total Tokens: " << sum << std::endl;
  } // end of finalize
}; // end of global_counts_aggregator struct


/**
 * \brief The Likelihood aggregators maintains the current estimate of
 * the log-likelihood of the current token assignments.
 *
 *  llik_words_given_topics = ...
 *    ntopics * (gammaln(nwords * beta) - nwords * gammaln(beta)) - ...
 *    sum_t(gammaln( n_t + nwords * beta)) +
 *    sum_w(sum_t(gammaln(n_wt + beta)));
 *
 *  llik_topics = ...
 *    ndocs * (gammaln(ntopics * alpha) - ntopics * gammaln(alpha)) + ...
 *    sum_d(sum_t(gammaln(n_td + alpha)) - gammaln(sum_t(n_td) + ntopics * alpha));
 */
class likelihood_aggregator : public graphlab::IS_POD_TYPE {
  typedef graph_type::vertex_type vertex_type;
  float lik_words_given_topics;
  float lik_topics;
public:
  likelihood_aggregator() : lik_words_given_topics(0), lik_topics(0) { }

  likelihood_aggregator& operator+=(const likelihood_aggregator& other) {
    lik_words_given_topics += other.lik_words_given_topics;
    lik_topics += other.lik_topics;
    return *this;
  } // end of operator +=

  static likelihood_aggregator
  map(icontext_type& context, const vertex_type& vertex) {
    using boost::math::lgamma;
    const factor_type& factor = vertex.data().factor;
    ASSERT_EQ(factor.size(), NTOPICS);
   likelihood_aggregator ret;
    if(is_word(vertex)) {
      for(size_t t = 0; t < NTOPICS; ++t) {
        const float value = std::max(count_type(factor[t]), count_type(0));
        ret.lik_words_given_topics += lgamma(value + BETA);
      }
    } else {  ASSERT_TRUE(is_doc(vertex));
      float ntokens_in_doc = 0;
      for(size_t t = 0; t < NTOPICS; ++t) {
        const float value = std::max(count_type(factor[t]), count_type(0));
        ret.lik_topics += lgamma(value + ALPHA);
        ntokens_in_doc += factor[t];
      }
      ret.lik_topics -= lgamma(ntokens_in_doc + NTOPICS * ALPHA);
    }
    return ret;
  } // end of map function

  static void finalize(icontext_type& context, const likelihood_aggregator& total) {
    using boost::math::lgamma;
    // Address the global sum terms
    float denominator = 0;
    for(size_t t = 0; t < NTOPICS; ++t) {
      denominator += lgamma(GLOBAL_TOPIC_COUNT[t] + NWORDS * BETA);
    } // end of for loop

    const float lik_words_given_topics =
      NTOPICS * (lgamma(NWORDS * BETA) - NWORDS * lgamma(BETA)) -
      denominator + total.lik_words_given_topics;

    const float lik_topics =
      NDOCS * (lgamma(NTOPICS * ALPHA) - NTOPICS * lgamma(ALPHA)) +
      total.lik_topics;

    const float lik = lik_words_given_topics + lik_topics;
    context.cout() << "Likelihood: " << lik << std::endl;
  } // end of finalize
}; // end of likelihood_aggregator struct


/**
 * \brief The selective signal functions are used to signal only the
 * vertices corresponding to words or documents.  This is done by
 * using the iengine::map_reduce_vertices function.
 */
struct signal_only {
  /**
   * \brief Signal only the document vertices and skip the word
   * vertices.
   */ 
  static graphlab::empty
  docs(icontext_type& context, const graph_type::vertex_type& vertex) {
    if(is_doc(vertex)) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_docs
 
 /**
  * \brief Signal only the word vertices and skip the document
  * vertices.
  */
  static graphlab::empty
  words(icontext_type& context, const graph_type::vertex_type& vertex) {
    if(is_word(vertex)) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_words
}; // end of selective_only


/**
 * \brief This function is used to load and then initialize the data
 * graph (corpus) from a folder or file.
 * 
 * The graph can be in either json form constructed using the graph
 * builder tools or in raw text form.  The raw text format contains a
 * token on each line of each file in the format:
 *
 \verbatim
 <docid> <wordid> <count>
          ...
 \endverbatim
 *
 * for example:
 \verbatim
    0    0     2
    0    4     1
    0    2     3
 \endverbatim
 * 
 * implies that document zero contains word zero twice, word 4 once,
 * and word two three times.
 *
 * If a dictionary is used it is important that each word id
 * correspond to the index in the dictionary file (starting at zero).
 *
 * Once loaded the total number of words, documents, and tokens is
 * counted and saved to global variables which are read during the
 * execution of the sampler.
 *
 * \param [in] dc The distributed control object used to coordinate
 * between machines.
 *
 * \param [in,out] graph The graph object that is initialized.
 * 
 * \param [in] corpus_dir The directory or file containing the graph
 * data.  The corpus directory can reside on hdfs in which case the
 * path should begin with "hdfs://namenode".  In addition the file(s)
 * may be gzipped and therefore must end in ".gz".
 *
 * \param [in] load_json Whether the graph data is in text format or
 * preprocessed json format using the graph builder tools.
 */
bool load_and_initialize_graph(graphlab::distributed_control& dc,
                               graph_type& graph,
                               const std::string& corpus_dir,
                               const std::string& format			       
			       ) {
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; timer.start();

  if(format=="matrix"){
      dc.cout() << "matrix format" << std::endl;
      graph.load(corpus_dir, graph_loader);
  // }else if(format=="json"){
  //     dc.cout() << "json format" << std::endl;
  //     graph.load_json(corpus_dir, false, eparser, vparser);
  // }else if(format=="json-gzip"){
  //     dc.cout() <<"json gzip format" << std::endl;
  //     graph.load_json(corpus_dir, true, eparser, vparser);
  }else{
      dc.cout() << "Non supported format. See --help" << std::endl;
      return false;
  }

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in "
            << timer.current_time() << " seconds." << std::endl;

  dc.cout() << "Computing number of words and documents." << std::endl;
  NWORDS = graph.map_reduce_vertices<size_t>(is_word);
  NDOCS = graph.map_reduce_vertices<size_t>(is_doc);
  NTOKENS = graph.map_reduce_edges<size_t>(count_tokens);
  dc.cout() << "Number of words:     " << NWORDS  << std::endl;
  dc.cout() << "Number of docs:      " << NDOCS   << std::endl;
  dc.cout() << "Number of tokens:    " << NTOKENS << std::endl;
  // Prepare the json struct with the word counts
  TOP_WORDS.lock.lock();
  TOP_WORDS.json_string = "{\n" + TOP_WORDS.json_header_string() +
    "\t\"values\": [] \n }";
  TOP_WORDS.lock.unlock();
  return true;
} // end of load and initialize graph


/**
 * \brief Load the dictionary global variable from the file containing
 * the terms (one term per line).
 *
 * Note that while graphs can be loaded from multiple files the
 * dictionary must be in a single file.  The dictionary is loaded
 * entirely into memory and used to display word clouds and the top
 * terms in each topic.
 *
 * \param [in] fname the file containing the dictionary data.  The
 * data can be located on HDFS and can also be gzipped (must end in
 * ".gz").
 * 
 */
bool load_dictionary(const std::string& fname)  {
  // std::cout << "staring load on: "
  //           << graphlab::get_local_ip_as_str() << std::endl;
  const bool gzip = boost::ends_with(fname, ".gz");
  // test to see if the graph_dir is an hadoop path
  if(boost::starts_with(fname, "hdfs://")) {
    graphlab::hdfs hdfs;
    graphlab::hdfs::fstream in_file(hdfs, fname);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;
    fin.set_auto_close(false);
    if(gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    while(std::getline(fin,term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } else {
    std::cout << "opening: " << fname << std::endl;
    std::ifstream in_file(fname.c_str(),
                          std::ios_base::in | std::ios_base::binary);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;
    if (gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good() || !fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    std::cout << "Loooping" << std::endl;
    while(std::getline(fin, term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } // end of else
  // std::cout << "Finished load on: "
  //           << graphlab::get_local_ip_as_str() << std::endl;
  std::cout << "Dictionary Size: " << DICTIONARY.size() << std::endl;
  return true;
} // end of load dictionary


struct count_saver {
  bool save_words;
  count_saver(bool save_words) : save_words(save_words) { }
  typedef graph_type::vertex_type vertex_type;
  typedef graph_type::edge_type   edge_type;
  std::string save_vertex(const vertex_type& vertex) const {
    // Skip saving vertex data if the vertex type is not consistent
    // with the save type
    if((save_words && is_doc(vertex)) ||
       (!save_words && is_word(vertex))) return "";
    // Proceed to save
    std::stringstream strm;
    if(save_words) {
      const graphlab::vertex_id_type vid = vertex.id();
      strm << vid << '\t';
    } else { // save documents
      const graphlab::vertex_id_type vid = (-vertex.id()) - 2;
      strm << vid << '\t';
    }
    const factor_type& factor = vertex.data().factor;
    for(size_t i = 0; i < factor.size(); ++i) { 
      strm << factor[i];
      if(i+1 < factor.size()) strm << '\t';
    }
    strm << '\n';
    return strm.str();
  }
  std::string save_edge(const edge_type& edge) const {
    return ""; //nop
  }
}; // end of prediction_saver


/**
 * \brief The omni engine type is used to allow switching between
 * synchronous and asynchronous computation. 
 */
typedef graphlab::omni_engine<cgs_lda_vertex_program> engine_type;


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;
  //  INITIALIZE_EVENT_LOG(dc);
  ADD_CUMULATIVE_EVENT(TOKEN_CHANGES, "Token Changes", "Changes");

  // Parse command line options -----------------------------------------------
  const std::string description =
    "\n=========================================================================\n"
    "The Collapsed Gibbs Sampler for the LDA model implements\n"
    "a highly asynchronous version of parallel LDA in which document\n"
    "and word counts are maintained in an eventually consistent\n"
    "manner.\n"
    "\n"
    "The standard usage is: \n"
    "\t./cgs_lda --dictionary dictionary.txt --corpus doc_word_count.tsv\n"
    "where dictionary.txt contains: \n"
    "\taaa \n\taaai \n\tabalone \n\t   ... \n"
    "each line number corresponds to wordid (i.e aaa has wordid=0)\n\n"
    "and doc_word_count.tsv is formatted <docid> <wordid> <count>:\n"
    "(where wordid is indexed starting from zero and docid are positive integers)\n"
    "\t0\t0\t3\n"
    "\t0\t5\t1\n"
    "\t ...\n\n"
    "For JSON format, make sure docid are negative integers index starting from -2 \n\n"
    "To learn more about the NLP package and its applications visit\n\n"
    "\t\t http://graphlab.org \n\n"
    "Additional Options";
  graphlab::command_line_options clopts(description);
  std::string corpus_dir;
  std::string dictionary_fname;
  std::string doc_dir;
  std::string word_dir;
  std::string exec_type = "asynchronous";
  std::string format = "matrix";
  clopts.attach_option("dictionary", dictionary_fname,
                       "The file containing the list of unique words");
  clopts.attach_option("engine", exec_type, 
                       "The engine type synchronous or asynchronous");
  clopts.attach_option("corpus", corpus_dir,
                       "The directory or file containing the corpus data.");
  clopts.add_positional("corpus");
  clopts.attach_option("ntopics", NTOPICS,
                       "Number of topics to use.");
  clopts.attach_option("alpha", ALPHA,
                       "The document hyper-prior");
  clopts.attach_option("beta", BETA,
                       "The word hyper-prior");
  clopts.attach_option("topk", TOPK,
                       "The number of words to report");
  clopts.attach_option("interval", INTERVAL,
                       "statistics reporting interval");
  clopts.attach_option("max_count", MAX_COUNT,
                       "The maximum number of occurences of a word in a document.");
  clopts.attach_option("format", format,
                       "Formats: matrix,json,json-gzip");
  clopts.attach_option("burnin", BURNIN, 
                       "The time in second to run until a sample is collected. "
                       "If less than zero the sampler runs indefinitely.");
  clopts.attach_option("doc_dir", doc_dir,
                       "The output directory to save the final document counts.");
  clopts.attach_option("word_dir", word_dir,
                       "The output directory to save the final words counts.");


  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  if(dictionary_fname.empty()) {
    logstream(LOG_WARNING) << "No dictionary file was provided." << std::endl
                           << "Top k words will not be estimated." << std::endl;
  }

  if(corpus_dir.empty()) {
    logstream(LOG_ERROR) << "No corpus file was provided." << std::endl;
    return EXIT_FAILURE;
  }

  // Start the webserver
  graphlab::launch_metric_server();
  graphlab::add_metric_server_callback("wordclouds", word_cloud_callback);


  ///! Initialize global variables
  GLOBAL_TOPIC_COUNT.resize(NTOPICS);
  if(!dictionary_fname.empty()) {
    const bool success = load_dictionary(dictionary_fname);
    if(!success) {
      logstream(LOG_ERROR) << "Error loading dictionary." << std::endl;
      return EXIT_FAILURE;
    }
  }

  ///! load the graph
  graph_type graph(dc, clopts);
  {
    const bool success = 
      load_and_initialize_graph(dc, graph, corpus_dir, format);
    if(!success) {
      logstream(LOG_ERROR) << "Error loading graph." << std::endl;
      return EXIT_FAILURE;
    }
  }


  const size_t ntokens = graph.map_reduce_edges<size_t>(count_tokens);
  dc.cout() << "Total tokens: " << ntokens << std::endl;


  engine_type engine(dc, graph, exec_type, clopts);
  ///! Add an aggregator
  if(!DICTIONARY.empty()) {
    const bool success =
      engine.add_vertex_aggregator<topk_aggregator>
      ("topk", topk_aggregator::map, topk_aggregator::finalize) &&
      engine.aggregate_periodic("topk", INTERVAL);
    ASSERT_TRUE(success);
  }

  { // Add the Global counts aggregator
    const bool success =
      engine.add_vertex_aggregator<factor_type>
      ("global_counts", 
       global_counts_aggregator::map, 
       global_counts_aggregator::finalize) &&
      engine.aggregate_periodic("global_counts", 5);
    ASSERT_TRUE(success);
  }
  
/*  { // Add the likelihood aggregator
    const bool success =
      engine.add_vertex_aggregator<likelihood_aggregator>
      ("likelihood", 
       likelihood_aggregator::map, 
       likelihood_aggregator::finalize) &&
      engine.aggregate_periodic("likelihood", 10);
    ASSERT_TRUE(success);
  }*/

  engine.aggregate_now("global_counts");

  ///! schedule only documents
  dc.cout() << "Running The Collapsed Gibbs Sampler" << std::endl;
  engine.map_reduce_vertices<graphlab::empty>(signal_only::docs);
  graphlab::timer timer;
  // Enable sampling
  cgs_lda_vertex_program::DISABLE_SAMPLING = false;
  // Run the engine
  engine.start();
  // Finalize the counts
  cgs_lda_vertex_program::DISABLE_SAMPLING = true;
  engine.signal_all();
  engine.start();
  
  const double runtime = timer.current_time();
  dc.cout()
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): "
    << engine.num_updates() / runtime << std::endl;
  
  
  if(!word_dir.empty()) {
    // save word topic counts
    const bool gzip_output = false;
    const bool save_vertices = true;
    const bool save_edges = false;
    const size_t threads_per_machine = 2;
    const bool save_words = true;
    graph.save(word_dir, count_saver(save_words),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);
  }

  
  if(!doc_dir.empty()) {
    // save doc topic counts
    const bool gzip_output = false;
    const bool save_vertices = true;
    const bool save_edges = false;
    const size_t threads_per_machine = 2;
    const bool save_words = false;
    graph.save(doc_dir, count_saver(save_words),
               gzip_output, save_vertices, 
               save_edges, threads_per_machine);

  }


  graphlab::stop_metric_server_on_eof();
  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/topic_modeling/deprecated/cgs_lda.cpp
================================================
/**
 * Copyright (c) 2009 Carnegie Mellon University.
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 * For more about this software visit:
 *
 *      http://www.graphlab.ml.cmu.edu
 *
 */


#include <vector>
#include <algorithm>

#include "cgs_lda_common.hpp"

#include <graphlab/macros_def.hpp>


/**
 * \brief The gather type used to accumulate information about the
 * words in a document
 */
struct gather_type {
  typedef std::pair<factor_type, assignment_type> edge_pair_type;
  typedef std::map<graphlab::vertex_id_type, edge_pair_type>
  neighborhood_map_type;
  neighborhood_map_type neighborhood_map;

  gather_type() { }

  gather_type(const graph_type::edge_type& edge,
              const graph_type::vertex_type& vertex) {
    const graph_type::vertex_type other_vertex =
      get_other_vertex(edge, vertex);
    neighborhood_map[other_vertex.id()] =
      edge_pair_type(other_vertex.data().factor, edge.data().assignment);
  }

  void save(graphlab::oarchive& arc) const { arc << neighborhood_map; }

  void load(graphlab::iarchive& arc) { arc >> neighborhood_map; }

  gather_type& operator+=(const gather_type& other) {
    neighborhood_map.insert(other.neighborhood_map.begin(),
                            other.neighborhood_map.end());
    return *this;
  } // end of operator +=

}; // end of gather type


class cgs_lda_vertex_program :
  public graphlab::ivertex_program<graph_type, gather_type> {
private:
  typedef std::map<graphlab::vertex_id_type, assignment_type> edge_data_map_type;
  edge_data_map_type new_edge_data;
public:
  void save(graphlab::oarchive& arc) const {
    arc << new_edge_data;
  } // end of save cgs_lda

  void load(graphlab::iarchive& arc) {
    arc >> new_edge_data;
  } // end of load cgs_lda


  edge_dir_type gather_edges(icontext_type& context,
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } // end of gather_edges

  gather_type gather(icontext_type& context, const vertex_type& vertex,
                     edge_type& edge) const {
    return gather_type(edge, vertex);
  } // end of gather


  void apply(icontext_type& context, vertex_type& vertex,
             const gather_type& sum) {
    const size_t num_neighbors = vertex.num_in_edges() + vertex.num_out_edges();
    ASSERT_GT(num_neighbors, 0);
    ASSERT_EQ(new_edge_data.size(), 0);
    vertex_data& vdata = vertex.data();
    factor_type& factor = vdata.factor;
    ASSERT_EQ(factor.size(), NTOPICS);

    // first update the factor count for this vertex
    typedef gather_type::neighborhood_map_type::value_type pair_type;
    foreach(const pair_type& nbr_pair, sum.neighborhood_map) {
      const assignment_type& assignment = nbr_pair.second.second;
      foreach(const topic_id_type& asg, assignment) {
        if(asg != NULL_TOPIC) ++factor[asg];
      } // end of loop over assignments
    } // end of loop over neighborhood

    // Resample the vertex
    vdata.nchanges = 0;
    // run the actual gibbs sampling
    std::vector<double> prob(NTOPICS);
    typedef gather_type::neighborhood_map_type::value_type pair_type;
    foreach(const pair_type& nbr_pair, sum.neighborhood_map) {
      const graphlab::vertex_id_type other_id = nbr_pair.first;
      factor_type other_factor = nbr_pair.second.first;
      assignment_type assignment = nbr_pair.second.second;
      factor_type& doc_topic_count  = is_doc(vertex)? factor : other_factor;
      factor_type& word_topic_count = is_word(vertex)? factor : other_factor;
      ASSERT_EQ(word_topic_count.size(), NTOPICS);
      ASSERT_EQ(doc_topic_count.size(), NTOPICS);
      // Resample the topics
      foreach(topic_id_type& asg, assignment) {
        const topic_id_type old_asg = asg;
        if(asg != NULL_TOPIC) { // construct the cavity
          --doc_topic_count[asg];
          --word_topic_count[asg];
          --GLOBAL_TOPIC_COUNT[asg];
        }
        for(size_t t = 0; t < NTOPICS; ++t) {
          const double n_dt =
            std::max(count_type(doc_topic_count[t]), count_type(0));
          ASSERT_GE(n_dt, 0);
          const double n_wt =
            std::max(count_type(word_topic_count[t]), count_type(0));
          ASSERT_GE(n_wt, 0);
          const double n_t  =
            std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0));
          ASSERT_GE(n_t, 0);
          prob[t] = (ALPHA + n_dt) * (BETA + n_wt) / (BETA * NWORDS + n_t);
        }
        asg = graphlab::random::multinomial(prob);
        ++doc_topic_count[asg];
        ++word_topic_count[asg];
        ++GLOBAL_TOPIC_COUNT[asg];
        // record a change if one occurs
        if(old_asg != asg) vdata.nchanges++;
      } // End of loop over each token
      // test to see if the topic assignments have change
      // sort the topic assignment to be in a "canonical order"
      std::sort(assignment.begin(), assignment.end());
      const assignment_type& old_assignment = nbr_pair.second.second;
      bool is_same = (old_assignment.size() == assignment.size());
      for(size_t i = 0; i < assignment.size() && is_same; ++i)
        is_same = (assignment[i] == old_assignment[i]);
      if(!is_same) new_edge_data[other_id] = assignment;
    } // end of loop over neighbors
  } // end of apply


  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  }; // end of scatter edges

  void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const  {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    edge_data_map_type::const_iterator iter =
      new_edge_data.find(other_vertex.id());
    // If there is an assignment then something changed
    if(iter != new_edge_data.end()) {
      const assignment_type& new_topic_assignment = iter->second;
      ASSERT_EQ(new_topic_assignment.size(),
                edge.data().assignment.size());
      edge.data().assignment = new_topic_assignment;
    }
    context.signal(get_other_vertex(edge, vertex));
  } // end of scatter function

}; // end of cgs_lda_vertex_program


typedef graphlab::omni_engine<cgs_lda_vertex_program> engine_type;
typedef cgs_lda_vertex_program::icontext_type icontext_type;
typedef topk_aggregator<icontext_type> topk_type;
typedef selective_signal<icontext_type> signal_only;
typedef global_counts_aggregator<icontext_type> global_counts_agg;


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);

  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  // Parse command line options -----------------------------------------------
  const std::string description =
    "\n=========================================================================\n"
    "The fast Collapsed Gibbs Sampler for the LDA model implements\n"
    "a highly asynchronous version of parallel LDA in which document\n"
    "and word counts are maintained in an eventually consistent\n"
    "manner.\n"
    "\n"
    "The standard usage is: \n"
    "\t./fast_cgs_lda --dictionary dictionary.txt --matrix doc_word_count.tsv\n"
    "where dictionary.txt contains: \n"
    "\taaa \n\taaai \n\tabalone \n\t   ... \n"
    "each line number corresponds to wordid (i.e aaa has wordid=0)\n\n"
    "and doc_word_count.tsv is formatted <docid> <wordid> <count>:\n"
    "(where wordid is indexed starting from zero and docid are positive integers)\n"
    "\t0\t0\t3\n"
    "\t0\t5\t1\n"
    "\t ...\n\n"
    "For JSON format, make sure docid are negative integers index starting from -2 \n\n"
    "To learn more about the NLP package and its applications visit\n\n"
    "\t\t http://graphlab.org \n\n"
    "Additional Options";
  graphlab::command_line_options clopts(description);
  std::string matrix_dir;
  std::string dictionary_fname;
  bool loadjson = false;
  clopts.attach_option("dictionary", &dictionary_fname, dictionary_fname,
                       "The file containing the list of unique words");
  clopts.add_positional("dictionary");
  clopts.attach_option("matrix", &matrix_dir, matrix_dir,
                       "The directory containing the matrix file");
  clopts.add_positional("matrix");
  clopts.attach_option("ntopics", &NTOPICS, NTOPICS,
                       "Number of topics to use.");
  clopts.attach_option("alpha", &ALPHA, ALPHA,
                       "The document hyper-prior");
  clopts.attach_option("beta", &BETA, BETA,
                       "The word hyper-prior");
  clopts.attach_option("topk", &TOPK, TOPK,
                       "The number of words to report");
  clopts.attach_option("interval", &INTERVAL, INTERVAL,
                       "statistics reporting interval");
  clopts.attach_option("loadjson",&loadjson,loadjson,
                        "Boolean if in json format (matrix arg is dir or gzip file)");

  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }
  if(dictionary_fname.empty()) {
    logstream(LOG_ERROR) << "No dictionary file was provided." << std::endl;
    return EXIT_FAILURE;
  }
  if(matrix_dir.empty()) {
    logstream(LOG_ERROR) << "No matrix file was provided." << std::endl;
    return EXIT_FAILURE;
  }


  ///! Initialize global variables
  GLOBAL_TOPIC_COUNT.resize(NTOPICS);
  bool success = load_dictionary(dictionary_fname);
  if(!success) {
    logstream(LOG_ERROR) << "Error loading dictionary." << std::endl;
    return EXIT_FAILURE;
  }

  ///! load the graph
  graph_type graph(dc, clopts);
  success = load_and_initialize_graph(dc, graph, matrix_dir,loadjson);
  if(!success) {
    logstream(LOG_ERROR) << "Error loading graph." << std::endl;
    return EXIT_FAILURE;
  }


  engine_type engine(dc, graph, clopts, "synchronous");
  ///! Add an aggregator
  success =
    engine.add_vertex_aggregator<topk_type>
    ("topk", topk_type::map, topk_type::finalize) &&
    engine.aggregate_periodic("topk", INTERVAL);
  ASSERT_TRUE(success);
  // success =
  //   engine.add_vertex_aggregator<factor_type>
  //   ("global_counts", global_counts_agg::map, global_counts_agg::finalize) &&
  //   engine.aggregate_periodic("global_counts", 5);
  // ASSERT_TRUE(success);


  ///! schedule only documents
  dc.cout() << "Running The Collapsed Gibbs Sampler" << std::endl;
  engine.map_reduce_vertices<graphlab::empty>(signal_only::docs);
  graphlab::timer timer;
  engine.start();
  const double runtime = timer.current_time();
  dc.cout()
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): "
    << engine.num_updates() / runtime << std::endl;

  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/topic_modeling/deprecated/cvb0_lda_common.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <vector>
#include <algorithm>
#include <graphlab.hpp>


#include "cvb0_lda_common.hpp"

#include <graphlab/macros_def.hpp>

double ALPHA    = 0.1;
double BETA     = 0.1;
size_t NTOPICS  = 50;
size_t NWORDS   = 0;
size_t TOPK     = 5;
size_t INTERVAL = 10;
factor_type GLOBAL_TOPIC_COUNT;
std::vector<std::string> DICTIONARY;
size_t MAX_COUNT = 100;

bool graph_loader(graph_type& graph, const std::string& fname, 
                  const std::string& line) {
  ASSERT_FALSE(line.empty()); 
  const int BASE = 10;
  char* next_char_ptr = NULL;
  graph_type::vertex_id_type doc_id = 
    strtoul(line.c_str(), &next_char_ptr, BASE);
  if(next_char_ptr == NULL) return false;
  const graph_type::vertex_id_type word_id = 
    strtoul(next_char_ptr, &next_char_ptr, BASE);
  if(next_char_ptr == NULL) return false;
  size_t count = 
    strtoul(next_char_ptr, &next_char_ptr, BASE);
  if(next_char_ptr == NULL) return false;
  
  count = std::min(count, MAX_COUNT);

  // since this is a bipartite graph I need a method to number the
  // left and right vertices differently.  To accomplish I make sure
  // all vertices have non-zero ids and then negate the right vertex.
  doc_id += 2; 
  ASSERT_GT(doc_id, 1); 
  doc_id = -doc_id;
  ASSERT_NE(doc_id, word_id);

  // Create an edge and add it to the graph
  graph.add_edge(doc_id, word_id, edge_data(count));
  return true; // successful load
}; // end of graph loader


/** populate the global dictionary */
bool load_dictionary(const std::string& fname) {
  const bool gzip = boost::ends_with(fname, ".gz");
  // test to see if the graph_dir is an hadoop path
  if(boost::starts_with(fname, "hdfs://")) {
    graphlab::hdfs hdfs;
    graphlab::hdfs::fstream in_file(hdfs, fname);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;  
    fin.set_auto_close(false);
    if(gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    while(std::getline(fin,term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } else {
    std::cout << "opening: " << fname << std::endl;
    std::ifstream in_file(fname.c_str(), 
                          std::ios_base::in | std::ios_base::binary);
    boost::iostreams::filtering_stream<boost::iostreams::input> fin;  
    if (gzip) fin.push(boost::iostreams::gzip_decompressor());
    fin.push(in_file);
    if(!fin.good()) {
      logstream(LOG_ERROR) << "Error loading dictionary: "
                           << fname << std::endl;
      return false;
    }
    std::string term;
    std::cout << "Loooping" << std::endl;
    while(std::getline(fin, term).good()) DICTIONARY.push_back(term);
    if (gzip) fin.pop();
    fin.pop();
    in_file.close();
  } // end of else
  std::cout << "Dictionary Size: " << DICTIONARY.size() << std::endl;
  return true;
} // end of load dictionary


bool load_and_initialize_graph(graphlab::distributed_control& dc,
                               graph_type& graph,
                               const std::string& matrix_dir) {  
  dc.cout() << "Loading graph." << std::endl;
  graphlab::timer timer; timer.start();
  graph.load(matrix_dir, graph_loader); 
  dc.cout() << ": Loading graph. Finished in " 
            << timer.current_time() << " seconds." << std::endl;

  dc.cout() << "Finalizing graph." << std::endl;
  timer.start();
  graph.finalize();
  dc.cout() << "Finalizing graph. Finished in " 
            << timer.current_time() << " seconds." << std::endl;

  dc.cout() << "Initializing Vertex Data" << std::endl;
  timer.start();
  graph.transform_vertices(initialize_vertex_data);
  dc.cout() << "Finished initializing Vertex Data in " 
            << timer.current_time() << " seconds." << std::endl;

  dc.cout() << "Verivying dictionary size." << std::endl;
  NWORDS = graph.map_reduce_vertices<size_t>(is_word);
  dc.cout()  << "Number of words: " << NWORDS;
  //ASSERT_LT(NWORDS, DICTIONARY.size());

  return true;
} // end of load and initialize graph


================================================
FILE: toolkits/topic_modeling/deprecated/cvb0_lda_common.hpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */

#ifndef CVB0_LDA_HPP
#define CVB0_LDA_HPP
 
#include <vector>
#include <algorithm>
#include <graphlab.hpp>

#include <graphlab/macros_def.hpp>
typedef float count_type;
typedef uint16_t topic_id_type;
#define NULL_TOPIC (topic_id_type(-1))

typedef std::vector< count_type > factor_type;
typedef std::vector< topic_id_type > assignment_type;

extern double ALPHA;
extern double BETA;
extern size_t NTOPICS;
extern size_t NWORDS;
extern size_t TOPK;
extern size_t INTERVAL;
extern factor_type GLOBAL_TOPIC_COUNT;
extern std::vector<std::string> DICTIONARY;
extern size_t MAX_COUNT;


inline factor_type& operator+=(factor_type& lvalue, const factor_type& rvalue) {
  if(!rvalue.empty()) {
    if(lvalue.size() != NTOPICS) lvalue = rvalue;
    else {
      for(size_t t = 0; t < lvalue.size(); ++t) lvalue[t] += rvalue[t];
    }
  }
  return lvalue;
} // end of operator +=


/**
 * The vertex data type
 */
struct vertex_data {
  factor_type factor;
  size_t nupdates;
  float nchanges;
  vertex_data() : nupdates(0), nchanges(0) { }
  void save(graphlab::oarchive& arc) const { 
    arc << factor << nupdates << nchanges; 
  }
  void load(graphlab::iarchive& arc) { 
    arc >> factor >> nupdates >> nchanges; 
  } 
}; // end of vertex_data


/**
 * The edge data type
 */
struct edge_data {
  uint32_t count; factor_type belief;
  edge_data(uint32_t count = 0) : count(count), belief(NTOPICS) { }
  void save(graphlab::oarchive& arc) const { 
    arc << count << belief;
  }
  void load(graphlab::iarchive& arc) { 
    arc >> count >> belief;
  }
}; // end of edge data


/**
 * \brief The graph type;
 */
typedef graphlab::distributed_graph<vertex_data, edge_data> graph_type;


bool graph_loader(graph_type& graph, const std::string& fname, 
                  const std::string& line);

inline void initialize_vertex_data(graph_type::vertex_type& vertex) {
  vertex.data().factor.resize(NTOPICS);
}


bool load_and_initialize_graph(graphlab::distributed_control& dc,
                               graph_type& graph,
                               const std::string& matrix_dir);


/** populate the global dictionary */
bool load_dictionary(const std::string& fname);


inline bool is_word(const graph_type::vertex_type& vertex) {
  return vertex.num_in_edges() > 0 ? 1 : 0;
}

inline bool is_doc(const graph_type::vertex_type& vertex) {
  return vertex.num_out_edges() > 0 ? 1 : 0;
}

inline graph_type::vertex_type 
get_other_vertex(graph_type::edge_type& edge, 
                 const graph_type::vertex_type& vertex) {
  return vertex.id() == edge.source().id()? edge.target() : edge.source();
}


template<typename IContext>
class topk_aggregator {
  typedef IContext icontext_type;
  typedef std::pair<float, graphlab::vertex_id_type> cw_pair_type;
private:
  std::vector< std::set<cw_pair_type> > top_words;
  float nchanges, nupdates;
public:
  topk_aggregator(size_t nchanges = 0, size_t nupdates = 0) : 
    nchanges(nchanges), nupdates(nupdates) { }

  void save(graphlab::oarchive& arc) const { 
    arc << top_words << nchanges << nupdates; 
  }
  void load(graphlab::iarchive& arc) { 
    arc >> top_words >> nchanges >> nupdates;
  }

  topk_aggregator& operator+=(const topk_aggregator& other) {
    nchanges += other.nchanges;
    nupdates += other.nupdates;
    if(other.top_words.empty()) return *this;
    if(top_words.empty()) top_words.resize(NTOPICS);
    for(size_t i = 0; i < top_words.size(); ++i) {
      // Merge the topk
      top_words[i].insert(other.top_words[i].begin(), 
                          other.top_words[i].end());
      // Remove excess elements        
      while(top_words[i].size() > TOPK) 
        top_words[i].erase(top_words[i].begin());
    }
    return *this;
  } // end of operator +=
  
  static topk_aggregator map(icontext_type& context, 
                             const graph_type::vertex_type& vertex) {
    topk_aggregator ret_value;
    const vertex_data& vdata = vertex.data();
    ret_value.nchanges = vdata.nchanges;
    ret_value.nupdates = vdata.nupdates;
    if(is_word(vertex)) {
      const graphlab::vertex_id_type wordid = vertex.id();    
      ret_value.top_words.resize(vdata.factor.size());
      for(size_t i = 0; i < vdata.factor.size(); ++i) {
        const cw_pair_type pair(vdata.factor[i], wordid);
        ret_value.top_words[i].insert(pair);
      }
    } 
    return ret_value;
  } // end of map function

  static void finalize(icontext_type& context,
                       const topk_aggregator& total) {
    if(context.procid() != 0) return;
    for(size_t i = 0; i < total.top_words.size(); ++i) {
      std::cout << "Topic " << i << ": ";
      rev_foreach(cw_pair_type pair, total.top_words[i])  {
        ASSERT_LT(pair.second, DICTIONARY.size());
        // std::cout << DICTIONARY[pair.second] 
        //           << "(" << pair.first << ")" << ", "; 
        std::cout << DICTIONARY[pair.second] << ",  ";
      }
      std::cout << std::endl;
    }
    std::cout << "\nNumber of token changes: " << total.nchanges << std::endl;
    std::cout << "\nNumber of updates:       " << total.nupdates << std::endl;
  } // end of finalize
}; // end of topk_aggregator struct


template<typename IContext>
struct global_counts_aggregator {
  typedef graph_type::vertex_type vertex_type;
  static factor_type map(IContext& context, const vertex_type& vertex) {
    return vertex.data().factor;
  } // end of map function

  static void finalize(IContext& context, const factor_type& total) {
    for(size_t t = 0; t < total.size(); ++t)
      GLOBAL_TOPIC_COUNT[t] =
        std::max(count_type(total[t]/2), count_type(0));
  } // end of finalize
}; // end of global_counts_aggregator struct


template<typename IContext>
struct selective_signal {
 static graphlab::empty 
 docs(IContext& context, graph_type::vertex_type& vertex) {
   if(is_doc(vertex)) context.signal(vertex);
   return graphlab::empty();
 } // end of signal_docs
 static graphlab::empty 
 words(IContext& context, graph_type::vertex_type& vertex) {
    if(is_word(vertex)) context.signal(vertex);
    return graphlab::empty();
  } // end of signal_words
}; // end of selective_signal


#include <graphlab/macros_undef.hpp>
#endif


================================================
FILE: toolkits/topic_modeling/deprecated/fast_cvb0_lda.cpp
================================================
/**  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <vector>
#include <algorithm>
#include <graphlab.hpp>


#include "cvb0_lda_common.hpp"


#include <graphlab/macros_def.hpp>


class cvb0_lda_vertex_program :
  public graphlab::ivertex_program<graph_type, factor_type>,
  public graphlab::IS_POD_TYPE {
public:

  edge_dir_type gather_edges(icontext_type& context, 
                             const vertex_type& vertex) const {
    return graphlab::ALL_EDGES;
  } // end of gather_edges 

  factor_type gather(icontext_type& context, const vertex_type& vertex, 
                     edge_type& edge) const {
    vertex_type other_vertex = get_other_vertex(edge, vertex);
    // VIOLATING THE ABSTRACTION!
    vertex_data& vdata = graph_type::vertex_type(vertex).data();
    // VIOLATING THE ABSTRACTION!
    vertex_data& other_vdata = other_vertex.data();
    factor_type& doc_topic_count = 
      is_doc(vertex) ? vdata.factor : other_vdata.factor;
    factor_type& word_topic_count = 
      is_word(vertex) ? vdata.factor : other_vdata.factor;
    ASSERT_EQ(doc_topic_count.size(), NTOPICS);
    ASSERT_EQ(word_topic_count.size(), NTOPICS);
    // run the actual gibbs sampling 
    factor_type& belief = edge.data().belief;
    const uint32_t count = edge.data().count;
    // Resample the topics
    double sum = 0, old_sum = 0;
    for(size_t t = 0; t < NTOPICS; ++t) {
      old_sum += belief[t];
      doc_topic_count[t] -= belief[t];
      word_topic_count[t] -= belief[t];
      GLOBAL_TOPIC_COUNT[t] -= belief[t];
      const double n_dt = 
        std::max(count_type(doc_topic_count[t]), count_type(0));
      ASSERT_GE(n_dt, 0);
      const double n_wt = 
        std::max(count_type(word_topic_count[t]), count_type(0)); 
      ASSERT_GE(n_wt, 0);
      const double n_t  = 
        std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0)); 
      ASSERT_GE(n_t, 0);
      belief[t] = (ALPHA + n_dt) * (BETA + n_wt) / (BETA * NWORDS + n_t);
      sum += belief[t];
    } // End of loop over each token
    ASSERT_GT(sum, 0);
    if(old_sum == 0) {
      size_t asg = graphlab::random::multinomial(belief);
      for(size_t i = 0; i < NTOPICS; ++i) belief[i] = 0;
      belief[asg] = count;
      return belief;
    }
    for(size_t t = 0; t < NTOPICS; ++t) {
      belief[t] = count * (belief[t]/sum);
      doc_topic_count[t] += belief[t];
      word_topic_count[t] += belief[t];
      GLOBAL_TOPIC_COUNT[t] += belief[t];
    }
    return belief;
  } // end of gather


  void apply(icontext_type& context, vertex_type& vertex,
             const factor_type& sum) {
    const size_t num_neighbors = vertex.num_in_edges() + vertex.num_out_edges();
    ASSERT_GT(num_neighbors, 0);
    // There should be no new edge data since the vertex program has been cleared
    vertex_data& vdata = vertex.data();
    ASSERT_EQ(sum.size(), NTOPICS);
    ASSERT_EQ(vdata.factor.size(), NTOPICS);
    vdata.nupdates++; vdata.nchanges = 0; 
    vdata.factor = sum;
  } // end of apply

  edge_dir_type scatter_edges(icontext_type& context,
                              const vertex_type& vertex) const { 
    return graphlab::ALL_EDGES; 
  }; // end of scatter edges

  void scatter(icontext_type& context, const vertex_type& vertex, 
               edge_type& edge) const {
    const vertex_type other_vertex = get_other_vertex(edge, vertex);
    context.signal(other_vertex);
  } // end of scatter function

}; // end of cvb0_lda_vertex_program


typedef graphlab::omni_engine<cvb0_lda_vertex_program> engine_type;
typedef cvb0_lda_vertex_program::icontext_type icontext_type;
typedef topk_aggregator<icontext_type> topk_type;
typedef selective_signal<icontext_type> signal_only;
typedef global_counts_aggregator<icontext_type> global_counts_agg;


int main(int argc, char** argv) {
  global_logger().set_log_level(LOG_INFO);
  global_logger().set_log_to_console(true);
  ///! Initialize control plain using mpi
  graphlab::mpi_tools::init(argc, argv);
  graphlab::distributed_control dc;

  // Parse command line options -----------------------------------------------
  const std::string description = 
    "\n=========================================================================\n"
    "The fast Collapsed Variational Bayes Alg for the LDA model implements\n" 
    "a highly asynchronous version of parallel CVB0 in which document\n"
    "and word counts are maintained in an eventually consistent\n"
    "manner.\n"
    "\n"
    "The standard usage is: \n"
    "\t./fast_cvb0_lda --dictionary dictionary.txt --matrix doc_word_count.tsv\n"
    "where dictionary.txt contains: \n"
    "\taaa \n\taaai \n\tabalone \n\t   ... \n\n"
    "and doc_word_count.tsv is formatted <docid> <wordid> <count>:\n"
    "\t0\t0\t3\n"
    "\t0\t5\t1\n"
    "\t ...\n\n"
    "To learn more about the NLP package and its applications visit\n\n"
    "\t\t http://graphlab.org \n\n"
    "Additional Options";
  graphlab::command_line_options clopts(description);
  std::string matrix_dir; 
  std::string dictionary_fname;
  clopts.attach_option("dictionary", &dictionary_fname, dictionary_fname,
                       "The file containing the list of unique words");
  clopts.add_positional("dictionary");
  clopts.attach_option("matrix", &matrix_dir, matrix_dir,
                       "The directory or file containing the matrix data.");
  clopts.add_positional("matrix");
  clopts.attach_option("ntopics", &NTOPICS, NTOPICS,
                       "Number of topics to use.");
  clopts.attach_option("alpha", &ALPHA, ALPHA,
                       "The document hyper-prior");
  clopts.attach_option("beta", &BETA, BETA,                       
                       "The word hyper-prior");
  clopts.attach_option("topk", &TOPK, TOPK,
                       "The number of words to report");
  clopts.attach_option("interval", &INTERVAL, INTERVAL,
                       "statistics reporting interval");
  clopts.attach_option("max_count", &MAX_COUNT, MAX_COUNT,
                       "The maximum number of occurences of a word in a document.");
  if(!clopts.parse(argc, argv)) {
    graphlab::mpi_tools::finalize();
    return clopts.is_set("help")? EXIT_SUCCESS : EXIT_FAILURE;
  }

  if(dictionary_fname.empty()) {
    logstream(LOG_ERROR) << "No dictionary file was provided." << std::endl;
    return EXIT_FAILURE;
  }

  if(matrix_dir.empty()) {
    logstream(LOG_ERROR) << "No matrix file was provided." << std::endl;
    return EXIT_FAILURE;
  }

  ///! Initialize global variables
  GLOBAL_TOPIC_COUNT.resize(NTOPICS);
  bool success = load_dictionary(dictionary_fname); 
  if(!success) {
    logstream(LOG_ERROR) << "Error loading dictionary." << std::endl;
    return EXIT_FAILURE;
  }
  
  ///! load the graph
  graph_type graph(dc, clopts);  
  success = load_and_initialize_graph(dc, graph, matrix_dir);
  if(!success) {
    logstream(LOG_ERROR) << "Error loading graph." << std::endl;
    return EXIT_FAILURE;
  }


  engine_type engine(dc, graph, clopts, "asynchronous");
  ///! Add an aggregator
  success = 
    engine.add_vertex_aggregator<topk_type>
    ("topk", topk_type::map, topk_type::finalize) &&
    engine.aggregate_periodic("topk", INTERVAL);
  ASSERT_TRUE(success);
  success = 
    engine.add_vertex_aggregator<factor_type>
    ("global_counts", global_counts_agg::map, global_counts_agg::finalize) &&
    engine.aggregate_periodic("global_counts", 5);
  ASSERT_TRUE(success);


  ///! schedule only documents
  dc.cout() << "Running The Collapsed Gibbs Sampler" << std::endl;
  engine.map_reduce_vertices<graphlab::empty>(signal_only::docs);
  graphlab::timer timer;
  engine.start();  
  const double runtime = timer.current_time();
    dc.cout() 
    << "----------------------------------------------------------" << std::endl
    << "Final Runtime (seconds):   " << runtime 
    << std::endl
    << "Updates executed: " << engine.num_updates() << std::endl
    << "Update Rate (updates/second): " 
    << engine.num_updates() / runtime << std::endl;


  graphlab::mpi_tools::finalize();
  return EXIT_SUCCESS;


} // end of main


================================================
FILE: toolkits/topic_modeling/http/index.html
================================================
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-1" />
<meta name="author" content="GraphLab Dev Team" />
<meta name="publisher" content="Carnegie Mellon University" />
<meta name="copyright" content="(c) 2012. GraphLab.org" />
<meta name="distribution" content="global" />
<meta name="keywords" content="GraphLabUI" />
<meta name="description" content="WordCloud Viewer" />
<meta name="robots" content="all" />
<title>LDA WordCloud Viewer </title>
<link type="text/css" rel="stylesheet" href="style.css"/> 


<!-- Load external APIs -->
<link rel="stylesheet" 
      type="text/css" 
      href="http://visapi-gadgets.googlecode.com/svn/trunk/termcloud/tc.css"/>
<script 
   type="text/javascript" 
   src="http://visapi-gadgets.googlecode.com/svn/trunk/termcloud/tc.js"></script>

<script 
   type="text/javascript" 
   src="https://www.google.com/jsapi">
</script>


<!-- The Main visualization script -->
<script 
   type="text/javascript" 
   src="lda_visualizer.js">
</script>
</head>


<body>
<h1> WordCloud Visualizer </h1>
<FORM NAME="myform" ACTION="" METHOD="GET">
<INPUT TYPE="text" NAME="inputbox" VALUE="http://localhost:8090">
<INPUT TYPE="button" NAME="button" Value="Change Domain" onClick="update_domain(this.form)">
</FORM>

<div id="info">
  <div class="setting"> 
   <div class="label">NTopics</div> 
   <div id="ntopics"></div>
  </div>

  <div class="setting"> 
   <div class="label">NWords</div> 
   <div id="nwords"></div>
  </div>

  <div class="setting"> 
   <div class="label">NDocs</div> 
   <div id="ndocs"></div>
  </div>
  
  <div class="setting"> 
   <div class="label">NTokens</div>   
   <div id="ntokens"></div>
  </div>

  <div class="setting"> 
   <div class="label">Alpha</div> 
   <div id="alpha"></div>
  </div>

  <div class="setting"> 
   <div class="label">Beta</div> 
   <div id="beta"></div>
  </div
</div>

<div id="word_clouds"></div>

</body>


</html>


================================================
FILE: toolkits/topic_modeling/http/lda_visualizer.js
================================================
google.load("jquery", "1.5");
google.load("jqueryui", "1.7.2");
google.load("visualization", "1");


var domain_str = "http://localhost:8090";
var page_str = "/wordclouds";
var update_interval = 2000;

function update_domain(form) {
    domain_str = form.inputbox.value;
    get_top_words();
}

var term_clouds = [];

// Start the rendering of the UI
google.setOnLoadCallback(function() { 
    get_top_words();
});


function get_top_words() {
    jQuery.getJSON(domain_str + page_str, process_top_words).error(function() { 
            console.log("Unable to access " + domain_str + " will try again.");
        }).complete(function() {
            setTimeout(get_top_words, update_interval);
        });
}


function process_top_words(data) {
    // Load summary info
    $("#ntopics").text(data.ntopics);
    $("#nwords").text(data.nwords);
    $("#ndocs").text(data.ndocs);
    $("#ntokens").text(data.ntokens);
    $("#alpha").text(data.alpha);
    $("#beta").text(data.beta);   

    // Render all the current values
    var container = $("#word_clouds");
    
    jQuery.each(data.values, function(i, term_count_table) {        
        if(term_clouds[i] == undefined) {
            var div_name = "term_cloud_" + i;            
            container.append(
                "<div class=\"cloud\" id=\"" + div_name  + "\"></div>");
            var div = container.children("#" + div_name);            
            var cloud = new TermCloud(div[0]);
            term_clouds[i] = { div: div, cloud: cloud };
        }
        var labels = [["String", "Value"]];        
        var table_data =  labels.concat(term_count_table);
        var table = google.visualization.arrayToDataTable(table_data);
        table.addColumn("string", "URL");
        //        console.log(table);
        term_clouds[i].cloud.draw(table, null );
    });
    // Get the job info again
} // end of process top words


================================================
FILE: toolkits/topic_modeling/http/style.css
================================================
.body {
    text-align: center;

}
.cloud {
    display: inline-block;
    width: 200px;
    height: 200px;
}

.setting {
    display: inline-block;
    width: 100px;
}

.label {
    font-size: 14pt;
}


================================================
FILE: toolkits/topic_modeling/http/wordclouds
================================================
{
	"ntopics": 50,
	"nwords":  8349328,
	"ndocs":   2601668,
	"ntokens": 479126338,
	"alpha":   0.5,
	"beta":    0.1,
	"values": [
	[
		["river", 260462], 
		["water", 154470], 
		["lake", 137116], 
		["mountain", 87756], 
		["area", 80303], 
		["north", 68834], 
		["sea", 68785], 
		["valley", 65516], 
		["park", 59788], 
		["south", 52968], 
		["forest", 51247], 
		["mountains", 50866], 
		["creek", 42652], 
		["rock", 38748], 
		["ice", 37717], 
		["range", 36985], 
		["west", 34535], 
		["high", 34438], 
		["dam", 34158], 
		["located", 33700], 
		["island", 33345], 
		["fish", 33186], 
		["national", 32635], 
		["bay", 32511], 
		["trail", 31835], 
		["ocean", 31464], 
		["east", 31423], 
		["winter", 31241], 
		["rivers", 30637], 
		["northern", 30126], 
		["areas", 30011], 
		["food", 29762], 
		["beach", 29407], 
		["wine", 29129], 
		["flows", 28804], 
		["southern", 28628], 
		["mount", 28387], 
		["fishing", 28031], 
		["summer", 27191], 
		["long", 26993], 
		["region", 26712], 
		["small", 26610], 
		["large", 26522], 
		["coast", 26439], 
		["lakes", 26206], 
		["years", 25754], 
		["natural", 25746], 
		["point", 25608], 
		["basin", 24581], 
		["great", 24348]
	], 
	[
		["number", 59354], 
		["space", 47824], 
		["set", 47815], 
		["function", 46299], 
		["theory", 46070], 
		["example", 41449], 
		["time", 41122], 
		["1", 39395], 
		["point", 38591], 
		["field", 38161], 
		["system", 35163], 
		["will", 33158], 
		["called", 32081], 
		["group", 30044], 
		["case", 29285], 
		["model", 28084], 
		["form", 27220], 
		["defined", 26408], 
		["numbers", 25599], 
		["problem", 24195], 
		["0", 23684], 
		["method", 23448], 
		["order", 22210], 
		["points", 21109], 
		["values", 20471], 
		["general", 20341], 
		["functions", 19980], 
		["equation", 19596], 
		["light", 19328], 
		["distance", 19263], 
		["2", 19161], 
		["linear", 18590], 
		["complex", 17839], 
		["data", 17699], 
		["mathematical", 17674], 
		["quantum", 17593], 
		["theorem", 17420], 
		["constant", 17157], 
		["sequence", 16984], 
		["result", 16901], 
		["equal", 16793], 
		["terms", 16791], 
		["mass", 16653], 
		["frequency", 16355], 
		["elements", 16339], 
		["real", 16260], 
		["object", 16235], 
		["measure", 16193], 
		["surface", 16155], 
		["distribution", 16118]
	], 
	[
		["chinese", 117128], 
		["china", 108771], 
		["hong", 49992], 
		["kong", 48754], 
		["emperor", 40718], 
		["swedish", 40242], 
		["norwegian", 35603], 
		["sweden", 35590], 
		["norway", 31796], 
		["li", 30072], 
		["singapore", 27479], 
		["danish", 25759], 
		["taiwan", 21623], 
		["denmark", 21541], 
		["vietnam", 20716], 
		["province", 18312], 
		["dynasty", 18155], 
		["thailand", 17941], 
		["wang", 17617], 
		["finland", 17472], 
		["finnish", 16520], 
		["beijing", 15304], 
		["liu", 15226], 
		["modern", 15080], 
		["han", 14879], 
		["vietnamese", 14198], 
		["shanghai", 14088], 
		["wu", 13748], 
		["prince", 13740], 
		["thai", 13254], 
		["asia", 12640], 
		["yang", 12389], 
		["zhang", 12382], 
		["government", 12224], 
		["son", 12155], 
		["imperial", 12104], 
		["stockholm", 11894], 
		["oslo", 11566], 
		["yuan", 11455], 
		["hawaii", 11328], 
		["asian", 11198], 
		["chen", 11143], 
		["sun", 11118], 
		["tang", 10570], 
		["general", 10416], 
		["wei", 10070], 
		["capital", 9970], 
		["northern", 9943], 
		["copenhagen", 9879], 
		["peoples", 9820]
	], 
	[
		["station", 218615], 
		["road", 211136], 
		["route", 166646], 
		["railway", 162110], 
		["bridge", 117385], 
		["north", 93174], 
		["highway", 90290], 
		["service", 89930], 
		["east", 73950], 
		["south", 70309], 
		["train", 69541], 
		["trains", 67179], 
		["west", 66155], 
		["bus", 63332], 
		["street", 63221], 
		["rail", 61975], 
		["traffic", 61103], 
		["city", 55517], 
		["built", 51505], 
		["opened", 51184], 
		["services", 50737], 
		["lines", 47599], 
		["railroad", 46751], 
		["main", 43720], 
		["construction", 42361], 
		["river", 41844], 
		["routes", 38736], 
		["junction", 38702], 
		["stations", 37598], 
		["canal", 37036], 
		["passenger", 36131], 
		["system", 35540], 
		["transport", 35523], 
		["operated", 35131], 
		["avenue", 33983], 
		["branch", 33422], 
		["track", 31377], 
		["runs", 31158], 
		["tunnel", 30271], 
		["central", 30257], 
		["roads", 30175], 
		["side", 27881], 
		["railways", 26943], 
		["transit", 26668], 
		["buses", 26533], 
		["area", 26351], 
		["passengers", 25829], 
		["located", 25769], 
		["southern", 25737], 
		["crossing", 25642]
	], 
	[
		["africa", 72052], 
		["african", 58981], 
		["israel", 45272], 
		["south", 38017], 
		["islamic", 36077], 
		["arab", 34470], 
		["israeli", 32276], 
		["muslim", 32113], 
		["al", 31321], 
		["iran", 30762], 
		["iraq", 30647], 
		["government", 29202], 
		["turkish", 24919], 
		["international", 24117], 
		["turkey", 22596], 
		["egypt", 21825], 
		["islam", 21774], 
		["united", 20860], 
		["ali", 20493], 
		["afghanistan", 20104], 
		["country", 20003], 
		["muslims", 18215], 
		["iranian", 17513], 
		["ibn", 17238], 
		["muhammad", 17186], 
		["nations", 16723], 
		["military", 16531], 
		["british", 16381], 
		["group", 16381], 
		["east", 16358], 
		["palestinian", 16130], 
		["arabic", 16037], 
		["lebanon", 15922], 
		["persian", 15509], 
		["iraqi", 15374], 
		["security", 15325], 
		["armenian", 15050], 
		["saudi", 15009], 
		["countries", 14815], 
		["war", 14277], 
		["nigeria", 14011], 
		["abu", 13779], 
		["egyptian", 13540], 
		["palestine", 13334], 
		["republic", 13297], 
		["jordan", 13088], 
		["foreign", 12990], 
		["national", 12911], 
		["middle", 12839], 
		["arabia", 12801]
	], 
	[
		["god", 93661], 
		["jewish", 69449], 
		["jews", 35911], 
		["religious", 34013], 
		["temple", 31507], 
		["jesus", 29290], 
		["gods", 27329], 
		["day", 26164], 
		["religion", 25225], 
		["rabbi", 24187], 
		["christian", 22783], 
		["life", 22730], 
		["spiritual", 22025], 
		["will", 21864], 
		["tradition", 20805], 
		["people", 20758], 
		["text", 20618], 
		["called", 20473], 
		["hebrew", 20082], 
		["bible", 19857], 
		["man", 18481], 
		["israel", 17916], 
		["spirit", 17889], 
		["divine", 17375], 
		["book", 17297], 
		["great", 16997], 
		["three", 16652], 
		["faith", 15460], 
		["death", 15099], 
		["ancient", 15076], 
		["buddhist", 14703], 
		["time", 14341], 
		["testament", 13416], 
		["jerusalem", 13349], 
		["scholars", 13341], 
		["century", 12900], 
		["biblical", 12711], 
		["prayer", 12398], 
		["practice", 12319], 
		["word", 12218], 
		["ritual", 12205], 
		["written", 12073], 
		["sacred", 11998], 
		["worship", 11720], 
		["judaism", 11636], 
		["buddhism", 11563], 
		["earth", 11533], 
		["son", 11524], 
		["soul", 11523], 
		["lord", 11432]
	], 
	[
		["will", 94707], 
		["father", 90397], 
		["family", 89269], 
		["mother", 85666], 
		["man", 84192], 
		["love", 80756], 
		["tells", 72378], 
		["life", 71707], 
		["house", 69787], 
		["time", 60776], 
		["find", 58992], 
		["woman", 57520], 
		["friends", 53698], 
		["story", 51044], 
		["day", 50993], 
		["young", 49865], 
		["friend", 49143], 
		["help", 48819], 
		["wife", 48554], 
		["girl", 47015], 
		["night", 46902], 
		["takes", 45610], 
		["finds", 44474], 
		["relationship", 43821], 
		["episode", 41882], 
		["child", 40538], 
		["daughter", 38850], 
		["son", 38504], 
		["brother", 38075], 
		["room", 38034], 
		["money", 37618], 
		["children", 37584], 
		["parents", 37200], 
		["death", 36815], 
		["leave", 35373], 
		["police", 35105], 
		["going", 34058], 
		["eventually", 33450], 
		["boy", 33386], 
		["jack", 32434], 
		["car", 32434], 
		["sister", 32160], 
		["told", 31823], 
		["asks", 31298], 
		["husband", 29418], 
		["left", 29167], 
		["job", 29145], 
		["girls", 29071], 
		["named", 28463], 
		["michael", 28428]
	], 
	[
		["japan", 101530], 
		["japanese", 101059], 
		["fight", 34890], 
		["korea", 34406], 
		["korean", 30948], 
		["tokyo", 28879], 
		["title", 23513], 
		["anime", 17698], 
		["manga", 16643], 
		["round", 16027], 
		["boxing", 14448], 
		["series", 12374], 
		["decision", 11629], 
		["kim", 11563], 
		["martial", 10966], 
		["lost", 8766], 
		["champion", 8642], 
		["fighting", 8642], 
		["prefecture", 8376], 
		["defeated", 7824], 
		["japans", 7758], 
		["three", 7702], 
		["second", 7457], 
		["bout", 7450], 
		["seoul", 7438], 
		["fought", 7420], 
		["time", 7114], 
		["period", 7095], 
		["heavyweight", 7078], 
		["ufc", 6874], 
		["fights", 6863], 
		["called", 6344], 
		["imperial", 6192], 
		["fighter", 6034], 
		["south", 6021], 
		["clan", 5980], 
		["osaka", 5833], 
		["belt", 5807], 
		["main", 5709], 
		["training", 5633], 
		["super", 5459], 
		["samurai", 5436], 
		["fighters", 5355], 
		["lee", 5331], 
		["karate", 5221], 
		["arts", 5206], 
		["named", 5186], 
		["knockout", 5155], 
		["kyoto", 5121], 
		["boxer", 5121]
	], 
	[
		["company", 285451], 
		["business", 155572], 
		["services", 109733], 
		["companies", 97954], 
		["market", 92489], 
		["international", 91872], 
		["development", 91070], 
		["management", 86497], 
		["government", 84136], 
		["bank", 83545], 
		["financial", 79530], 
		["industry", 77810], 
		["united", 74349], 
		["public", 69466], 
		["will", 65405], 
		["group", 64362], 
		["economic", 62847], 
		["service", 60081], 
		["countries", 59365], 
		["trade", 56501], 
		["products", 54904], 
		["including", 49870], 
		["private", 49795], 
		["national", 49680], 
		["project", 49414], 
		["tax", 47754], 
		["corporation", 46149], 
		["price", 44329], 
		["money", 43774], 
		["largest", 42667], 
		["year", 42205], 
		["billion", 41753], 
		["investment", 41661], 
		["sold", 40125], 
		["capital", 38712], 
		["years", 38620], 
		["global", 38366], 
		["employees", 38014], 
		["production", 37815], 
		["exchange", 37379], 
		["health", 37288], 
		["provide", 37122], 
		["cost", 37070], 
		["projects", 36951], 
		["major", 36395], 
		["sales", 36384], 
		["established", 36025], 
		["policy", 35937], 
		["agency", 35357], 
		["stock", 35202]
	], 
	[
		["air", 233008], 
		["aircraft", 159233], 
		["flight", 71583], 
		["force", 69627], 
		["airport", 67388], 
		["flying", 47659], 
		["wing", 45639], 
		["base", 40380], 
		["storm", 37845], 
		["squadron", 37632], 
		["space", 37140], 
		["pilot", 34437], 
		["aviation", 33698], 
		["mission", 32739], 
		["operations", 31167], 
		["fighter", 29713], 
		["united", 29583], 
		["training", 29079], 
		["system", 27502], 
		["airlines", 26439], 
		["tropical", 26179], 
		["hurricane", 25897], 
		["service", 25349], 
		["pilots", 23754], 
		["flights", 23114], 
		["missile", 22827], 
		["crew", 21962], 
		["raf", 21869], 
		["control", 21739], 
		["group", 21450], 
		["ground", 21325], 
		["flew", 21136], 
		["landing", 20774], 
		["launch", 20689], 
		["radar", 20574], 
		["airline", 19017], 
		["plane", 18911], 
		["missions", 18601], 
		["time", 18264], 
		["three", 17917], 
		["damage", 17260], 
		["rocket", 16729], 
		["jet", 16662], 
		["military", 16334], 
		["center", 16317], 
		["international", 15841], 
		["unit", 15820], 
		["test", 15699], 
		["support", 15617], 
		["early", 15519]
	], 
	[
		["social", 117849], 
		["people", 104088], 
		["human", 96367], 
		["work", 76026], 
		["example", 75910], 
		["will", 75046], 
		["society", 70991], 
		["women", 66998], 
		["theory", 62441], 
		["life", 53499], 
		["nature", 47164], 
		["groups", 46975], 
		["movement", 46907], 
		["based", 46219], 
		["study", 45798], 
		["individual", 45676], 
		["time", 44975], 
		["culture", 43671], 
		["term", 43671], 
		["knowledge", 43549], 
		["view", 43517], 
		["process", 42888], 
		["political", 42806], 
		["form", 41585], 
		["common", 41406], 
		["concept", 40469], 
		["considered", 40453], 
		["development", 39545], 
		["person", 38333], 
		["modern", 37511], 
		["system", 37313], 
		["idea", 37139], 
		["change", 37119], 
		["ideas", 35671], 
		["approach", 35582], 
		["developed", 34599], 
		["individuals", 33850], 
		["group", 33834], 
		["sense", 33568], 
		["sexual", 33500], 
		["philosophy", 33365], 
		["practice", 32248], 
		["history", 32151], 
		["generally", 31362], 
		["experience", 30769], 
		["subject", 30594], 
		["cultural", 30274], 
		["influence", 30003], 
		["american", 29215], 
		["scientific", 29152]
	], 
	[
		["series", 65608], 
		["earth", 53229], 
		["time", 50167], 
		["power", 47313], 
		["battle", 39450], 
		["story", 36420], 
		["powers", 32912], 
		["planet", 32725], 
		["character", 32060], 
		["human", 31386], 
		["magic", 29620], 
		["appears", 29023], 
		["doctor", 27237], 
		["will", 27222], 
		["dark", 26562], 
		["man", 25171], 
		["evil", 24866], 
		["attack", 24804], 
		["characters", 24769], 
		["black", 24738], 
		["death", 24664], 
		["universe", 24600], 
		["form", 23619], 
		["body", 23146], 
		["team", 22832], 
		["killed", 22507], 
		["comics", 22430], 
		["kill", 22355], 
		["eventually", 22240], 
		["original", 21975], 
		["war", 21342], 
		["fight", 21247], 
		["help", 20763], 
		["dragon", 20753], 
		["created", 20708], 
		["life", 20669], 
		["revealed", 20266], 
		["space", 20079], 
		["named", 20034], 
		["star", 19931], 
		["ship", 19925], 
		["called", 19653], 
		["control", 19524], 
		["find", 19494], 
		["group", 19049], 
		["alien", 18945], 
		["captain", 18606], 
		["powerful", 18572], 
		["order", 18050], 
		["return", 17992]
	], 
	[
		["india", 148568], 
		["indian", 91540], 
		["pakistan", 43810], 
		["temple", 37909], 
		["sri", 35298], 
		["khan", 30039], 
		["singh", 24930], 
		["district", 22364], 
		["tamil", 21134], 
		["delhi", 20166], 
		["hindu", 20009], 
		["famous", 19594], 
		["british", 19013], 
		["village", 18641], 
		["people", 17676], 
		["lanka", 17074], 
		["bangladesh", 15089], 
		["place", 14848], 
		["bengal", 14839], 
		["called", 14839], 
		["started", 14216], 
		["pradesh", 13552], 
		["punjab", 13256], 
		["kerala", 12910], 
		["government", 12611], 
		["mumbai", 12480], 
		["family", 12424], 
		["lord", 12266], 
		["shah", 11677], 
		["muslim", 10955], 
		["raja", 10531], 
		["guru", 10127], 
		["nepal", 10126], 
		["krishna", 9927], 
		["ali", 9782], 
		["indias", 9363], 
		["hindi", 9250], 
		["temples", 9131], 
		["bombay", 8845], 
		["son", 8751], 
		["pakistani", 8686], 
		["day", 8570], 
		["till", 8389], 
		["main", 8222], 
		["madras", 8119], 
		["major", 8119], 
		["gandhi", 8095], 
		["shiva", 8084], 
		["time", 7964], 
		["sabha", 7914]
	], 
	[
		["church", 379972], 
		["st", 171048], 
		["catholic", 107879], 
		["bishop", 80819], 
		["christian", 68886], 
		["saint", 68356], 
		["pope", 50786], 
		["churches", 50319], 
		["parish", 49519], 
		["roman", 48606], 
		["religious", 41677], 
		["cathedral", 41340], 
		["holy", 39495], 
		["john", 36213], 
		["diocese", 32853], 
		["christ", 31852], 
		["century", 30111], 
		["chapel", 29020], 
		["year", 27677], 
		["priest", 26000], 
		["paul", 25944], 
		["archbishop", 25759], 
		["bishops", 24389], 
		["monastery", 23467], 
		["mary", 23176], 
		["abbey", 23106], 
		["order", 23035], 
		["mission", 21333], 
		["orthodox", 21196], 
		["council", 21051], 
		["founded", 19932], 
		["saints", 19454], 
		["faith", 19425], 
		["theology", 18786], 
		["rome", 18611], 
		["congregation", 18387], 
		["baptist", 18365], 
		["cardinal", 18137], 
		["seminary", 18002], 
		["protestant", 17006], 
		["jesus", 16969], 
		["time", 16921], 
		["years", 16178], 
		["cross", 16090], 
		["ordained", 15762], 
		["life", 15652], 
		["rev", 15449], 
		["theological", 15308], 
		["priests", 14920], 
		["missionary", 14743]
	], 
	[
		["time", 253091], 
		["years", 155361], 
		["people", 104756], 
		["day", 104464], 
		["death", 98956], 
		["began", 88619], 
		["police", 87021], 
		["left", 86479], 
		["led", 81651], 
		["days", 80684], 
		["claimed", 71065], 
		["despite", 70647], 
		["months", 65961], 
		["decided", 64082], 
		["case", 60374], 
		["men", 59711], 
		["continued", 59195], 
		["three", 59073], 
		["reported", 58914], 
		["stated", 57336], 
		["asked", 56990], 
		["prison", 56931], 
		["return", 55748], 
		["eventually", 54135], 
		["forced", 53807], 
		["refused", 53627], 
		["called", 52745], 
		["early", 52195], 
		["brought", 50917], 
		["set", 50116], 
		["met", 48529], 
		["result", 48261], 
		["told", 48232], 
		["trial", 47790], 
		["caused", 47731], 
		["life", 47473], 
		["killed", 46955], 
		["order", 46905], 
		["allowed", 46225], 
		["long", 46022], 
		["report", 45690], 
		["wanted", 45532], 
		["failed", 44595], 
		["fact", 44577], 
		["attempt", 44387], 
		["man", 43862], 
		["agreed", 43792], 
		["believed", 43738], 
		["received", 43719], 
		["returned", 42847]
	], 
	[
		["2009", 366418], 
		["2008", 363976], 
		["2007", 340817], 
		["2010", 340449], 
		["2006", 275341], 
		["1", 270064], 
		["september", 231466], 
		["june", 224598], 
		["october", 223378], 
		["january", 221942], 
		["july", 221320], 
		["march", 214129], 
		["november", 210514], 
		["december", 208958], 
		["april", 206148], 
		["2", 204552], 
		["august", 202367], 
		["2005", 196752], 
		["3", 175170], 
		["february", 172170], 
		["4", 157279], 
		["5", 154334], 
		["10", 150965], 
		["2004", 141553], 
		["announced", 132186], 
		["year", 127087], 
		["6", 124446], 
		["12", 121249], 
		["7", 120417], 
		["will", 119843], 
		["20", 113723], 
		["8", 112973], 
		["15", 112740], 
		["2003", 112621], 
		["11", 108367], 
		["9", 99739], 
		["30", 97553], 
		["16", 94235], 
		["13", 94047], 
		["14", 92821], 
		["2002", 88687], 
		["2001", 82892], 
		["25", 82620], 
		["17", 82304], 
		["21", 78303], 
		["22", 76483], 
		["19", 74498], 
		["23", 74081], 
		["24", 70794], 
		["26", 69395]
	], 
	[
		["energy", 89905], 
		["water", 82925], 
		["power", 66159], 
		["gas", 56708], 
		["high", 45724], 
		["oil", 44152], 
		["chemical", 39157], 
		["process", 38258], 
		["nuclear", 37878], 
		["will", 35298], 
		["temperature", 34434], 
		["material", 33868], 
		["heat", 33328], 
		["plant", 32528], 
		["surface", 31283], 
		["pressure", 30483], 
		["materials", 28993], 
		["production", 27183], 
		["carbon", 26924], 
		["reaction", 26304], 
		["low", 24192], 
		["acid", 23823], 
		["light", 22965], 
		["current", 22556], 
		["form", 21664], 
		["produced", 20971], 
		["air", 20933], 
		["metal", 20628], 
		["fuel", 20144], 
		["system", 19647], 
		["mine", 19430], 
		["electric", 19429], 
		["produce", 19316], 
		["products", 19214], 
		["liquid", 19082], 
		["systems", 19012], 
		["hydrogen", 18912], 
		["large", 18659], 
		["enzyme", 18634], 
		["electricity", 18610], 
		["electrical", 18563], 
		["solar", 17620], 
		["iron", 17574], 
		["plants", 17297], 
		["example", 17011], 
		["natural", 16722], 
		["oxygen", 16647], 
		["higher", 16408], 
		["organic", 16400], 
		["flow", 15931]
	], 
	[
		["scotland", 62224], 
		["list", 55368], 
		["scottish", 52947], 
		["national", 51865], 
		["register", 33796], 
		["places", 30371], 
		["historic", 28995], 
		["listed", 28810], 
		["james", 24496], 
		["edinburgh", 23219], 
		["glasgow", 22937], 
		["hamilton", 19534], 
		["ross", 17636], 
		["david", 17374], 
		["campbell", 17112], 
		["scott", 17073], 
		["john", 15620], 
		["douglas", 15137], 
		["gordon", 14794], 
		["murray", 14600], 
		["stewart", 13584], 
		["alexander", 13207], 
		["robert", 13187], 
		["anderson", 12835], 
		["bruce", 11325], 
		["stamps", 10801], 
		["graham", 10195], 
		["aberdeen", 9808], 
		["andrews", 9476], 
		["watson", 9182], 
		["properties", 9056], 
		["post", 8765], 
		["clan", 8395], 
		["postal", 8385], 
		["scots", 8275], 
		["duncan", 8237], 
		["united", 7759], 
		["county", 7752], 
		["districts", 7548], 
		["highland", 7423], 
		["andrew", 7356], 
		["isle", 7263], 
		["glen", 7159], 
		["stamp", 7116], 
		["wallace", 6982], 
		["cameron", 6955], 
		["burns", 6911], 
		["macdonald", 6876], 
		["year", 6845], 
		["included", 6723]
	], 
	[
		["system", 112467], 
		["data", 98808], 
		["software", 80744], 
		["systems", 80637], 
		["computer", 69581], 
		["users", 50524], 
		["code", 49057], 
		["network", 43536], 
		["internet", 43063], 
		["digital", 42273], 
		["user", 41382], 
		["technology", 41067], 
		["web", 39579], 
		["will", 37937], 
		["support", 36380], 
		["version", 35618], 
		["windows", 35469], 
		["access", 35283], 
		["applications", 33364], 
		["file", 31566], 
		["memory", 31465], 
		["application", 30358], 
		["mobile", 30310], 
		["control", 29927], 
		["standard", 29568], 
		["time", 29370], 
		["program", 29227], 
		["based", 28953], 
		["design", 27497], 
		["video", 27447], 
		["number", 26619], 
		["features", 26351], 
		["devices", 25250], 
		["service", 24458], 
		["project", 24424], 
		["device", 24401], 
		["allows", 24250], 
		["source", 23965], 
		["operating", 23881], 
		["computers", 23293], 
		["include", 23255], 
		["process", 22808], 
		["developed", 22475], 
		["development", 22002], 
		["content", 21905], 
		["microsoft", 21682], 
		["files", 21588], 
		["server", 21270], 
		["hardware", 21055], 
		["example", 21011]
	], 
	[
		["de", 212319], 
		["spanish", 94530], 
		["la", 75775], 
		["mexico", 64655], 
		["spain", 55565], 
		["el", 55072], 
		["san", 47334], 
		["del", 39534], 
		["brazil", 39453], 
		["wrestling", 32528], 
		["puerto", 32448], 
		["mexican", 32360], 
		["portuguese", 32195], 
		["juan", 30728], 
		["match", 26829], 
		["argentina", 26466], 
		["santa", 25341], 
		["jos", 24102], 
		["portugal", 23298], 
		["rio", 22341], 
		["brazilian", 22309], 
		["rico", 21972], 
		["city", 21453], 
		["los", 20305], 
		["latin", 20200], 
		["carlos", 20015], 
		["chile", 19658], 
		["america", 19504], 
		["antonio", 18782], 
		["da", 17048], 
		["madrid", 17022], 
		["tag", 16607], 
		["cuba", 16558], 
		["luis", 16150], 
		["las", 16136], 
		["american", 15534], 
		["pedro", 15490], 
		["colombia", 15458], 
		["peru", 15195], 
		["argentine", 14541], 
		["buenos", 14207], 
		["aires", 14185], 
		["francisco", 13820], 
		["manuel", 13669], 
		["title", 13498], 
		["venezuela", 13358], 
		["costa", 13326], 
		["began", 12172], 
		["main", 11831], 
		["cuban", 11672]
	], 
	[
		["de", 236835], 
		["french", 202365], 
		["france", 138650], 
		["la", 86432], 
		["paris", 83054], 
		["italian", 80684], 
		["italy", 60821], 
		["le", 55637], 
		["dutch", 50800], 
		["van", 43969], 
		["des", 36817], 
		["du", 35568], 
		["louis", 34582], 
		["netherlands", 34128], 
		["di", 32325], 
		["belgium", 22104], 
		["les", 21407], 
		["jean", 19444], 
		["rome", 18344], 
		["belgian", 15469], 
		["pierre", 15268], 
		["milan", 14440], 
		["amsterdam", 14430], 
		["en", 13437], 
		["european", 13300], 
		["charles", 12309], 
		["il", 11477], 
		["jacques", 10905], 
		["napoleon", 10840], 
		["brussels", 10782], 
		["florence", 10540], 
		["naples", 10539], 
		["born", 10526], 
		["europe", 10480], 
		["grand", 9970], 
		["giovanni", 9592], 
		["spain", 9456], 
		["venice", 9372], 
		["holland", 8956], 
		["left", 8898], 
		["marie", 8530], 
		["famous", 8381], 
		["franois", 8293], 
		["san", 8095], 
		["del", 7911], 
		["della", 7890], 
		["henri", 7863], 
		["commune", 7857], 
		["lyon", 7575], 
		["named", 7506]
	], 
	[
		["won", 181752], 
		["team", 122397], 
		["race", 116450], 
		["event", 104584], 
		["championship", 81216], 
		["held", 72452], 
		["championships", 70070], 
		["second", 66475], 
		["competition", 63669], 
		["place", 58087], 
		["racing", 55343], 
		["final", 53941], 
		["three", 51909], 
		["grand", 51844], 
		["medal", 51297], 
		["series", 51006], 
		["tour", 50173], 
		["finished", 50073], 
		["olympic", 49302], 
		["gold", 48741], 
		["win", 48719], 
		["olympics", 48532], 
		["time", 48188], 
		["season", 45041], 
		["events", 44912], 
		["year", 44752], 
		["champion", 43662], 
		["best", 41662], 
		["international", 36648], 
		["test", 36112], 
		["winning", 36109], 
		["tournament", 35647], 
		["winner", 35552], 
		["third", 35337], 
		["competed", 34801], 
		["points", 34729], 
		["races", 34514], 
		["record", 33922], 
		["top", 32812], 
		["cricket", 32031], 
		["womens", 31763], 
		["summer", 31668], 
		["open", 30495], 
		["prix", 30425], 
		["games", 30369], 
		["silver", 30071], 
		["track", 30009], 
		["cup", 29733], 
		["teams", 28898], 
		["round", 28471]
	], 
	[
		["london", 188616], 
		["australia", 125600], 
		["england", 117346], 
		["australian", 106100], 
		["british", 100209], 
		["south", 96541], 
		["wales", 77076], 
		["zealand", 68704], 
		["st", 53938], 
		["sydney", 52194], 
		["john", 48656], 
		["centre", 48005], 
		["royal", 47826], 
		["west", 45883], 
		["victoria", 43149], 
		["uk", 42222], 
		["melbourne", 39022], 
		["north", 36808], 
		["britain", 36585], 
		["council", 33376], 
		["local", 32867], 
		["oxford", 31555], 
		["manchester", 31278], 
		["sir", 29660], 
		["school", 29069], 
		["great", 28144], 
		["welsh", 27981], 
		["hill", 26377], 
		["war", 25474], 
		["east", 25095], 
		["george", 24434], 
		["years", 23911], 
		["cambridge", 23777], 
		["including", 23407], 
		["county", 23177], 
		["birmingham", 22708], 
		["yorkshire", 22380], 
		["english", 22041], 
		["borough", 21981], 
		["queensland", 21650], 
		["town", 21525], 
		["road", 21134], 
		["liverpool", 20615], 
		["kent", 20207], 
		["bristol", 19115], 
		["hall", 18610], 
		["formed", 18197], 
		["william", 17697], 
		["early", 17039], 
		["victorian", 16810]
	], 
	[
		["university", 463429], 
		["member", 137345], 
		["born", 134363], 
		["college", 121666], 
		["served", 117173], 
		["received", 109053], 
		["director", 105592], 
		["award", 103422], 
		["worked", 103347], 
		["years", 100862], 
		["professor", 95568], 
		["national", 95436], 
		["president", 89049], 
		["school", 87300], 
		["degree", 85310], 
		["institute", 78974], 
		["international", 66953], 
		["graduated", 65260], 
		["awarded", 63288], 
		["dr", 59801], 
		["association", 58365], 
		["career", 56992], 
		["attended", 56058], 
		["work", 55240], 
		["joined", 54174], 
		["studies", 51526], 
		["science", 51371], 
		["society", 51074], 
		["assistant", 49970], 
		["department", 48537], 
		["american", 48345], 
		["1972", 47973], 
		["1970", 47967], 
		["1980", 47806], 
		["1976", 47304], 
		["board", 46827], 
		["1968", 46734], 
		["appointed", 46721], 
		["1975", 46700], 
		["1979", 46583], 
		["1990", 46556], 
		["1973", 46504], 
		["studied", 46201], 
		["1978", 46106], 
		["1974", 46030], 
		["academy", 45998], 
		["1977", 45926], 
		["1985", 45800], 
		["1969", 45510], 
		["1984", 45365]
	], 
	[
		["area", 291810], 
		["town", 228390], 
		["city", 196667], 
		["population", 194023], 
		["village", 166037], 
		["district", 141399], 
		["people", 134300], 
		["land", 134236], 
		["region", 127491], 
		["island", 126985], 
		["local", 121356], 
		["south", 120239], 
		["north", 116976], 
		["century", 98568], 
		["east", 88909], 
		["west", 87533], 
		["areas", 84905], 
		["located", 80980], 
		["community", 75808], 
		["towns", 68451], 
		["islands", 62252], 
		["small", 62094], 
		["municipality", 58992], 
		["river", 58226], 
		["settlement", 54627], 
		["cities", 53679], 
		["northern", 52990], 
		["large", 52005], 
		["largest", 51995], 
		["villages", 49592], 
		["government", 49564], 
		["established", 48278], 
		["rural", 44343], 
		["southern", 42714], 
		["residents", 42432], 
		["territory", 41947], 
		["major", 41053], 
		["county", 40317], 
		["number", 39993], 
		["urban", 39422], 
		["inhabitants", 38785], 
		["agricultural", 38404], 
		["country", 38281], 
		["communities", 36513], 
		["capital", 36254], 
		["main", 36179], 
		["western", 35873], 
		["province", 34868], 
		["districts", 34175], 
		["early", 33552]
	], 
	[
		["party", 282718], 
		["law", 243059], 
		["government", 211086], 
		["election", 206291], 
		["court", 184981], 
		["president", 156133], 
		["elected", 146881], 
		["council", 130785], 
		["general", 127154], 
		["minister", 123014], 
		["political", 107021], 
		["national", 104419], 
		["members", 101221], 
		["committee", 98626], 
		["united", 97465], 
		["office", 95109], 
		["federal", 87667], 
		["member", 85523], 
		["house", 84878], 
		["parliament", 82624], 
		["vote", 80810], 
		["public", 80383], 
		["elections", 76578], 
		["democratic", 73790], 
		["held", 72992], 
		["assembly", 72587], 
		["justice", 70258], 
		["rights", 69036], 
		["candidate", 68445], 
		["congress", 66559], 
		["senate", 66305], 
		["district", 63603], 
		["seat", 63331], 
		["constitution", 60225], 
		["secretary", 60148], 
		["republican", 59644], 
		["campaign", 57992], 
		["governor", 57466], 
		["legal", 57439], 
		["appointed", 56932], 
		["term", 56705], 
		["liberal", 56525], 
		["commission", 56499], 
		["supreme", 55591], 
		["votes", 53420], 
		["conservative", 50926], 
		["leader", 49861], 
		["parties", 49491], 
		["bill", 48521], 
		["police", 47747]
	], 
	[
		["york", 204770], 
		["county", 170218], 
		["american", 159428], 
		["united", 124701], 
		["city", 115571], 
		["washington", 102216], 
		["john", 93869], 
		["texas", 86119], 
		["served", 85731], 
		["virginia", 82839], 
		["pennsylvania", 64529], 
		["war", 64076], 
		["moved", 62540], 
		["ohio", 61422], 
		["chicago", 60577], 
		["william", 59053], 
		["carolina", 57294], 
		["north", 56089], 
		["florida", 55904], 
		["illinois", 50975], 
		["george", 50905], 
		["james", 49954], 
		["died", 49153], 
		["massachusetts", 48767], 
		["president", 48574], 
		["named", 48408], 
		["jersey", 47805], 
		["born", 47060], 
		["boston", 46249], 
		["south", 45861], 
		["union", 44879], 
		["west", 43781], 
		["company", 43299], 
		["georgia", 41809], 
		["smith", 41778], 
		["began", 41438], 
		["michigan", 40164], 
		["fort", 39219], 
		["years", 38873], 
		["philadelphia", 38871], 
		["white", 38815], 
		["missouri", 35834], 
		["maryland", 35661], 
		["america", 34825], 
		["black", 33697], 
		["tennessee", 33559], 
		["indiana", 32860], 
		["st", 32376], 
		["jr", 32009], 
		["johnson", 31714]
	], 
	[
		["century", 124400], 
		["king", 111104], 
		["roman", 78855], 
		["empire", 73842], 
		["greek", 67390], 
		["bc", 58607], 
		["ancient", 49778], 
		["emperor", 49372], 
		["ii", 48992], 
		["kingdom", 48498], 
		["period", 45001], 
		["battle", 39897], 
		["city", 39562], 
		["time", 35244], 
		["great", 34437], 
		["war", 31828], 
		["ad", 30236], 
		["early", 27546], 
		["reign", 27366], 
		["kings", 27205], 
		["iii", 27189], 
		["son", 26791], 
		["rule", 25766], 
		["power", 25517], 
		["greece", 25443], 
		["army", 25238], 
		["centuries", 24801], 
		["dynasty", 24065], 
		["rome", 23997], 
		["modern", 23165], 
		["history", 20624], 
		["imperial", 20453], 
		["medieval", 19869], 
		["death", 19778], 
		["ottoman", 19660], 
		["years", 19434], 
		["led", 19132], 
		["byzantine", 18137], 
		["defeated", 17534], 
		["ruled", 17140], 
		["year", 17062], 
		["throne", 16521], 
		["athens", 16421], 
		["capital", 16348], 
		["castle", 16320], 
		["military", 16163], 
		["late", 15897], 
		["iv", 15474], 
		["middle", 15394], 
		["control", 14896]
	], 
	[
		["engine", 106344], 
		["car", 86013], 
		["design", 71867], 
		["model", 61157], 
		["cars", 60135], 
		["production", 51957], 
		["built", 51884], 
		["engines", 47808], 
		["vehicle", 45526], 
		["class", 44905], 
		["models", 42450], 
		["speed", 41574], 
		["vehicles", 41047], 
		["designed", 40748], 
		["produced", 37386], 
		["power", 33408], 
		["front", 32909], 
		["system", 32136], 
		["version", 32103], 
		["type", 31726], 
		["series", 31444], 
		["motor", 30334], 
		["rear", 30329], 
		["standard", 29624], 
		["gun", 28957], 
		["company", 28028], 
		["introduced", 25959], 
		["range", 25069], 
		["ford", 25015], 
		["sold", 24536], 
		["fuel", 21898], 
		["drive", 21660], 
		["wheel", 21509], 
		["tank", 21361], 
		["fitted", 20254], 
		["factory", 20253], 
		["machine", 19589], 
		["developed", 19383], 
		["based", 19316], 
		["replaced", 18844], 
		["wheels", 18649], 
		["time", 18463], 
		["powered", 18070], 
		["small", 17800], 
		["high", 17446], 
		["weight", 17020], 
		["electric", 16986], 
		["body", 16628], 
		["mounted", 16372], 
		["early", 16244]
	], 
	[
		["war", 295245], 
		["army", 266723], 
		["military", 143905], 
		["forces", 142815], 
		["battle", 134711], 
		["force", 111038], 
		["british", 110228], 
		["command", 105739], 
		["general", 103690], 
		["navy", 101416], 
		["ship", 100403], 
		["division", 99686], 
		["ships", 95391], 
		["troops", 82337], 
		["corps", 80884], 
		["service", 80655], 
		["naval", 75172], 
		["regiment", 74964], 
		["commander", 72783], 
		["infantry", 68989], 
		["attack", 66213], 
		["men", 64022], 
		["officer", 62600], 
		["fleet", 59858], 
		["soldiers", 58431], 
		["units", 55498], 
		["officers", 54634], 
		["operations", 53533], 
		["unit", 53380], 
		["june", 51211], 
		["august", 50803], 
		["brigade", 50271], 
		["july", 50067], 
		["fire", 49368], 
		["training", 49331], 
		["march", 48720], 
		["battalion", 48554], 
		["april", 48485], 
		["operation", 48099], 
		["captain", 47971], 
		["september", 47352], 
		["three", 45886], 
		["enemy", 45104], 
		["united", 43013], 
		["october", 42618], 
		["sea", 41631], 
		["royal", 41548], 
		["german", 41104], 
		["marine", 40895], 
		["major", 40763]
	], 
	[
		["son", 167725], 
		["died", 146431], 
		["married", 128307], 
		["family", 123107], 
		["king", 111969], 
		["daughter", 105251], 
		["john", 93901], 
		["death", 87804], 
		["william", 86856], 
		["father", 85020], 
		["born", 81388], 
		["wife", 80860], 
		["royal", 79052], 
		["ireland", 78714], 
		["irish", 76756], 
		["henry", 75696], 
		["house", 73119], 
		["lord", 68289], 
		["charles", 68138], 
		["sir", 67619], 
		["prince", 64874], 
		["brother", 64076], 
		["children", 61342], 
		["england", 59207], 
		["queen", 57108], 
		["duke", 56660], 
		["thomas", 52106], 
		["years", 49872], 
		["marriage", 45259], 
		["george", 44609], 
		["earl", 43289], 
		["edward", 42847], 
		["english", 42269], 
		["second", 39571], 
		["elizabeth", 38506], 
		["sons", 36878], 
		["mary", 36632], 
		["james", 36472], 
		["mother", 36003], 
		["appointed", 35157], 
		["year", 31700], 
		["dublin", 31063], 
		["lady", 30127], 
		["title", 29992], 
		["great", 29772], 
		["succeeded", 29558], 
		["robert", 29173], 
		["ii", 28901], 
		["member", 28894], 
		["castle", 28571]
	], 
	[
		["season", 256538], 
		["team", 216040], 
		["game", 178379], 
		["league", 134385], 
		["games", 132121], 
		["played", 101956], 
		["coach", 92610], 
		["football", 87033], 
		["record", 78102], 
		["teams", 74960], 
		["baseball", 65873], 
		["field", 62567], 
		["year", 62495], 
		["second", 59618], 
		["career", 58813], 
		["play", 57694], 
		["basketball", 56620], 
		["hockey", 56094], 
		["three", 54372], 
		["yards", 51744], 
		["won", 48262], 
		["bowl", 47535], 
		["points", 44995], 
		["win", 44803], 
		["series", 44746], 
		["player", 44170], 
		["head", 43803], 
		["conference", 43753], 
		["championship", 43112], 
		["seasons", 42888], 
		["players", 41676], 
		["draft", 41300], 
		["high", 40701], 
		["time", 39616], 
		["named", 37883], 
		["national", 37642], 
		["led", 37545], 
		["nfl", 37529], 
		["third", 36419], 
		["major", 36305], 
		["finished", 33991], 
		["stadium", 32681], 
		["division", 32508], 
		["lead", 31697], 
		["playing", 30923], 
		["ncaa", 29967], 
		["history", 29672], 
		["runs", 29525], 
		["touchdown", 29347], 
		["signed", 29002]
	], 
	[
		["species", 218072], 
		["family", 65492], 
		["birds", 55986], 
		["small", 48097], 
		["long", 43737], 
		["large", 38060], 
		["animals", 35579], 
		["bird", 34415], 
		["plants", 33186], 
		["genus", 32612], 
		["plant", 30935], 
		["natural", 30686], 
		["habitat", 30079], 
		["tree", 30041], 
		["fish", 28678], 
		["tropical", 27964], 
		["white", 26524], 
		["black", 25570], 
		["order", 25511], 
		["leaves", 24934], 
		["brown", 23957], 
		["common", 21978], 
		["forests", 21929], 
		["trees", 21513], 
		["animal", 20610], 
		["flowers", 20003], 
		["eggs", 19524], 
		["worldwide", 19021], 
		["feed", 18813], 
		["occur", 18520], 
		["subtropical", 18249], 
		["wild", 17362], 
		["length", 16602], 
		["male", 16511], 
		["breeding", 16462], 
		["habitats", 16348], 
		["range", 16272], 
		["food", 16219], 
		["female", 15961], 
		["fruit", 15662], 
		["short", 15442], 
		["insects", 15343], 
		["endemic", 15230], 
		["forest", 14960], 
		["group", 14842], 
		["including", 14680], 
		["include", 14587], 
		["moist", 14567], 
		["threatened", 14443], 
		["tail", 14414]
	], 
	[
		["art", 221966], 
		["museum", 124132], 
		["work", 100651], 
		["works", 65369], 
		["artists", 60161], 
		["collection", 56623], 
		["design", 55177], 
		["arts", 54633], 
		["painting", 49171], 
		["artist", 43694], 
		["gallery", 42139], 
		["paintings", 36760], 
		["exhibition", 34067], 
		["style", 32498], 
		["fine", 26393], 
		["including", 26002], 
		["painted", 24375], 
		["architecture", 24160], 
		["york", 21498], 
		["fashion", 20628], 
		["painter", 20611], 
		["life", 20555], 
		["early", 20091], 
		["created", 19733], 
		["sculpture", 19691], 
		["artistic", 19141], 
		["history", 19083], 
		["contemporary", 19063], 
		["collections", 18987], 
		["years", 18082], 
		["museums", 18003], 
		["worked", 17574], 
		["images", 17546], 
		["time", 17525], 
		["photography", 17482], 
		["figures", 17133], 
		["academy", 16423], 
		["exhibitions", 16098], 
		["modern", 15538], 
		["portrait", 15478], 
		["photographs", 15308], 
		["began", 15089], 
		["studio", 14463], 
		["drawing", 14370], 
		["include", 13926], 
		["exhibited", 13866], 
		["produced", 13707], 
		["designed", 13658], 
		["period", 13429], 
		["visual", 13335]
	], 
	[
		["white", 96740], 
		["red", 81635], 
		["black", 78110], 
		["blue", 65174], 
		["called", 42832], 
		["color", 41713], 
		["will", 36576], 
		["head", 36007], 
		["green", 35511], 
		["gold", 35229], 
		["side", 34744], 
		["small", 34412], 
		["hand", 33632], 
		["long", 32262], 
		["arms", 30112], 
		["top", 29926], 
		["flag", 29799], 
		["horse", 29723], 
		["wear", 29421], 
		["silver", 28948], 
		["common", 28709], 
		["light", 27650], 
		["dog", 27358], 
		["wood", 25393], 
		["body", 25251], 
		["type", 25059], 
		["large", 25005], 
		["yellow", 24524], 
		["form", 23338], 
		["worn", 22795], 
		["dogs", 22590], 
		["cut", 22197], 
		["popular", 22133], 
		["left", 21901], 
		["generally", 21826], 
		["traditional", 21543], 
		["ball", 21220], 
		["front", 20991], 
		["horses", 20849], 
		["shape", 20445], 
		["hair", 20397], 
		["feet", 19597], 
		["colors", 19549], 
		["time", 18692], 
		["coat", 18489], 
		["three", 18123], 
		["typically", 17728], 
		["modern", 17695], 
		["face", 17646], 
		["cross", 17455]
	], 
	[
		["school", 797694], 
		["students", 322211], 
		["university", 223060], 
		["high", 215654], 
		["college", 215126], 
		["schools", 213090], 
		["education", 209030], 
		["year", 120280], 
		["program", 119598], 
		["student", 111419], 
		["campus", 91134], 
		["community", 78216], 
		["programs", 74522], 
		["training", 68184], 
		["center", 65221], 
		["members", 64165], 
		["science", 63324], 
		["national", 62197], 
		["years", 61502], 
		["public", 60020], 
		["academic", 57457], 
		["association", 53148], 
		["courses", 48179], 
		["arts", 47300], 
		["educational", 47263], 
		["include", 46719], 
		["class", 46284], 
		["institute", 46084], 
		["department", 45032], 
		["teachers", 44371], 
		["colleges", 42472], 
		["classes", 42407], 
		["offers", 41769], 
		["activities", 41247], 
		["universities", 40298], 
		["district", 39492], 
		["engineering", 38781], 
		["learning", 38515], 
		["founded", 38485], 
		["faculty", 38255], 
		["girls", 38064], 
		["sports", 37555], 
		["children", 37484], 
		["boys", 37462], 
		["international", 37262], 
		["board", 36855], 
		["teaching", 36291], 
		["academy", 36168], 
		["secondary", 36151], 
		["established", 35640]
	], 
	[
		["album", 446364], 
		["band", 330220], 
		["song", 286320], 
		["released", 257516], 
		["music", 220855], 
		["songs", 160722], 
		["single", 137848], 
		["records", 125041], 
		["recorded", 118398], 
		["rock", 105713], 
		["bands", 93876], 
		["release", 89818], 
		["live", 88207], 
		["tour", 87917], 
		["video", 86282], 
		["record", 84378], 
		["albums", 83031], 
		["label", 80581], 
		["group", 80131], 
		["recording", 73915], 
		["guitar", 66461], 
		["track", 62417], 
		["cover", 61590], 
		["version", 61032], 
		["tracks", 60467], 
		["number", 59460], 
		["featured", 59052], 
		["time", 59033], 
		["chart", 57046], 
		["hit", 56365], 
		["uk", 54638], 
		["top", 53980], 
		["performed", 53958], 
		["studio", 53541], 
		["played", 53173], 
		["singles", 50390], 
		["sound", 47823], 
		["love", 46189], 
		["pop", 44820], 
		["artist", 44742], 
		["solo", 43783], 
		["cd", 43642], 
		["debut", 43475], 
		["singer", 42952], 
		["artists", 42801], 
		["members", 40864], 
		["included", 39588], 
		["early", 38715], 
		["second", 38353], 
		["bass", 37997]
	], 
	[
		["radio", 183618], 
		["station", 137115], 
		["news", 111480], 
		["television", 93354], 
		["channel", 88974], 
		["broadcast", 70195], 
		["stations", 68950], 
		["network", 65774], 
		["media", 60717], 
		["tv", 58102], 
		["broadcasting", 46294], 
		["time", 40693], 
		["format", 40519], 
		["local", 40496], 
		["program", 35684], 
		["bbc", 35218], 
		["programming", 33265], 
		["live", 32987], 
		["fm", 30534], 
		["morning", 29192], 
		["host", 29155], 
		["began", 28818], 
		["sports", 27645], 
		["fox", 26656], 
		["air", 26478], 
		["cable", 26270], 
		["call", 25763], 
		["hosted", 24746], 
		["coverage", 24088], 
		["music", 23877], 
		["pm", 22354], 
		["sunday", 21791], 
		["daily", 21756], 
		["channels", 21636], 
		["digital", 21065], 
		["abc", 20952], 
		["aired", 20886], 
		["changed", 20429], 
		["current", 20200], 
		["launched", 20098], 
		["communications", 20097], 
		["programme", 19888], 
		["day", 19757], 
		["broadcasts", 19295], 
		["moved", 19072], 
		["cbs", 18958], 
		["years", 18620], 
		["saturday", 18126], 
		["talk", 17806], 
		["night", 17600]
	], 
	[
		["age", 386369], 
		["18", 228156], 
		["population", 222305], 
		["income", 172578], 
		["average", 168517], 
		["years", 167212], 
		["median", 154814], 
		["living", 149471], 
		["65", 125600], 
		["males", 124300], 
		["females", 122617], 
		["households", 121379], 
		["100", 109446], 
		["family", 105354], 
		["people", 99715], 
		["families", 98724], 
		["older", 97900], 
		["town", 95590], 
		["size", 95179], 
		["city", 88956], 
		["household", 88380], 
		["miles", 88082], 
		["density", 86183], 
		["american", 73743], 
		["township", 72828], 
		["total", 71709], 
		["area", 71054], 
		["county", 69734], 
		["races", 67846], 
		["census", 62004], 
		["2000", 59614], 
		["square", 55762], 
		["45", 55584], 
		["25", 55308], 
		["64", 54827], 
		["children", 54479], 
		["24", 52937], 
		["44", 52845], 
		["white", 52734], 
		["female", 49373], 
		["land", 48571], 
		["including", 48458], 
		["units", 48242], 
		["housing", 48034], 
		["bureau", 47910], 
		["individuals", 47648], 
		["located", 47004], 
		["poverty", 46279], 
		["united", 45975], 
		["village", 44825]
	], 
	[
		["music", 246693], 
		["musical", 63951], 
		["opera", 63270], 
		["festival", 59822], 
		["orchestra", 59570], 
		["dance", 55452], 
		["performed", 52677], 
		["jazz", 43862], 
		["piano", 41471], 
		["theatre", 37764], 
		["performance", 34873], 
		["works", 34382], 
		["concert", 32349], 
		["symphony", 30688], 
		["composer", 29121], 
		["played", 26756], 
		["performances", 26516], 
		["instruments", 26372], 
		["musicians", 24682], 
		["classical", 24321], 
		["including", 24052], 
		["work", 23426], 
		["composed", 23187], 
		["major", 21898], 
		["singing", 21731], 
		["songs", 20068], 
		["folk", 20029], 
		["instrument", 19937], 
		["ballet", 19630], 
		["composition", 19456], 
		["composers", 19333], 
		["play", 19138], 
		["performing", 18902], 
		["concerts", 18852], 
		["playing", 18293], 
		["stage", 18078], 
		["years", 17992], 
		["include", 17931], 
		["popular", 17256], 
		["choir", 17243], 
		["ensemble", 17238], 
		["sound", 17231], 
		["style", 17011], 
		["time", 16361], 
		["violin", 15833], 
		["hall", 15628], 
		["piece", 15272], 
		["chamber", 15186], 
		["recordings", 15120], 
		["string", 15028]
	], 
	[
		["war", 139497], 
		["russian", 109040], 
		["soviet", 103128], 
		["union", 102373], 
		["russia", 68357], 
		["government", 66993], 
		["republic", 62877], 
		["political", 58416], 
		["party", 58152], 
		["polish", 57559], 
		["communist", 51884], 
		["national", 48571], 
		["european", 45873], 
		["poland", 45447], 
		["military", 44126], 
		["international", 41758], 
		["foreign", 38543], 
		["moscow", 37234], 
		["united", 36736], 
		["movement", 36530], 
		["countries", 34988], 
		["europe", 33844], 
		["country", 31205], 
		["treaty", 30290], 
		["socialist", 29581], 
		["revolution", 29252], 
		["group", 28381], 
		["independence", 26686], 
		["nations", 26537], 
		["members", 25364], 
		["workers", 24975], 
		["germany", 23727], 
		["peoples", 23038], 
		["army", 22667], 
		["central", 22540], 
		["peace", 21535], 
		["romania", 21501], 
		["ukraine", 21310], 
		["forces", 20784], 
		["ii", 20712], 
		["ukrainian", 20523], 
		["relations", 20441], 
		["german", 19947], 
		["organization", 19877], 
		["romanian", 19848], 
		["led", 19485], 
		["formed", 19175], 
		["serbian", 17856], 
		["revolutionary", 17565], 
		["groups", 17469]
	], 
	[
		["canada", 151475], 
		["canadian", 112362], 
		["california", 111394], 
		["san", 109557], 
		["ontario", 52500], 
		["toronto", 48493], 
		["los", 44998], 
		["francisco", 43748], 
		["angeles", 43445], 
		["quebec", 33237], 
		["city", 32182], 
		["philippines", 31211], 
		["montreal", 28186], 
		["provincial", 25292], 
		["vancouver", 23755], 
		["british", 23011], 
		["columbia", 22401], 
		["bay", 22286], 
		["santa", 21405], 
		["alberta", 20061], 
		["malaysia", 19542], 
		["pacific", 18844], 
		["ottawa", 18720], 
		["diego", 17902], 
		["province", 17861], 
		["nova", 17821], 
		["philippine", 17469], 
		["indonesia", 16269], 
		["named", 15705], 
		["riding", 14119], 
		["canadas", 13937], 
		["manila", 13401], 
		["manitoba", 12963], 
		["scotia", 12505], 
		["saskatchewan", 11114], 
		["winnipeg", 10871], 
		["calgary", 10425], 
		["indonesian", 10407], 
		["brunswick", 9661], 
		["edmonton", 9244], 
		["filipino", 9193], 
		["newfoundland", 8506], 
		["jose", 8345], 
		["centre", 8332], 
		["malaysian", 8203], 
		["north", 8086], 
		["halifax", 7827], 
		["currently", 7430], 
		["malay", 7351], 
		["provinces", 7191]
	], 
	[
		["building", 281354], 
		["built", 220409], 
		["house", 166902], 
		["park", 166415], 
		["city", 156863], 
		["site", 147300], 
		["street", 138217], 
		["located", 124267], 
		["buildings", 113201], 
		["area", 95684], 
		["construction", 87967], 
		["hall", 83058], 
		["opened", 79477], 
		["center", 71073], 
		["tower", 69313], 
		["main", 67615], 
		["houses", 66295], 
		["large", 63898], 
		["original", 63313], 
		["historic", 61372], 
		["public", 59065], 
		["hotel", 57774], 
		["stone", 51095], 
		["designed", 50777], 
		["centre", 50729], 
		["century", 50349], 
		["place", 49442], 
		["constructed", 48599], 
		["room", 47931], 
		["square", 47850], 
		["fire", 45655], 
		["location", 44816], 
		["side", 44128], 
		["floor", 43495], 
		["open", 40587], 
		["garden", 39694], 
		["local", 39407], 
		["small", 39012], 
		["office", 38976], 
		["originally", 38476], 
		["hill", 38361], 
		["wall", 37613], 
		["store", 37573], 
		["structure", 36982], 
		["completed", 36398], 
		["property", 36232], 
		["closed", 35914], 
		["including", 34877], 
		["today", 34082], 
		["nearby", 33618]
	], 
	[
		["film", 469340], 
		["series", 222043], 
		["films", 134231], 
		["television", 116992], 
		["role", 105450], 
		["movie", 99630], 
		["appeared", 97475], 
		["best", 88219], 
		["episode", 87383], 
		["theatre", 73971], 
		["production", 73493], 
		["award", 71857], 
		["played", 68464], 
		["tv", 61091], 
		["episodes", 59692], 
		["character", 58376], 
		["directed", 57375], 
		["actor", 56600], 
		["director", 51701], 
		["cast", 51028], 
		["star", 50060], 
		["comedy", 49963], 
		["play", 47832], 
		["awards", 47593], 
		["including", 47518], 
		["produced", 45696], 
		["season", 42907], 
		["featured", 42380], 
		["festival", 42374], 
		["drama", 40069], 
		["york", 39971], 
		["original", 39771], 
		["actress", 38614], 
		["stage", 38445], 
		["movies", 36284], 
		["roles", 35660], 
		["acting", 35436], 
		["released", 35406], 
		["hollywood", 35077], 
		["characters", 34844], 
		["actors", 34108], 
		["won", 34068], 
		["career", 33768], 
		["starred", 33640], 
		["starring", 33618], 
		["feature", 33386], 
		["company", 33374], 
		["john", 32625], 
		["dvd", 31721], 
		["producer", 31058]
	], 
	[
		["game", 242557], 
		["player", 112707], 
		["games", 105728], 
		["players", 80679], 
		["will", 52340], 
		["play", 42400], 
		["version", 38410], 
		["cards", 35149], 
		["card", 34870], 
		["released", 33341], 
		["2", 28870], 
		["points", 26591], 
		["time", 25853], 
		["three", 25500], 
		["level", 24051], 
		["series", 23436], 
		["number", 23335], 
		["characters", 23140], 
		["character", 21261], 
		["video", 20544], 
		["original", 20520], 
		["chess", 19964], 
		["set", 19842], 
		["3", 18092], 
		["mode", 17844], 
		["features", 16443], 
		["played", 16115], 
		["move", 15657], 
		["based", 15613], 
		["called", 15605], 
		["rules", 15571], 
		["score", 15249], 
		["release", 14968], 
		["final", 14367], 
		["main", 13939], 
		["levels", 13663], 
		["items", 13360], 
		["special", 13332], 
		["board", 13288], 
		["online", 12861], 
		["system", 12849], 
		["playing", 12531], 
		["beer", 12230], 
		["super", 12025], 
		["order", 11532], 
		["nintendo", 11363], 
		["playstation", 11247], 
		["include", 11131], 
		["gameplay", 11009], 
		["versions", 10914]
	], 
	[
		["language", 149361], 
		["english", 92811], 
		["word", 79375], 
		["languages", 68906], 
		["names", 56480], 
		["term", 47898], 
		["example", 46912], 
		["form", 43898], 
		["meaning", 38166], 
		["common", 36463], 
		["number", 32288], 
		["latin", 27480], 
		["written", 26917], 
		["list", 25071], 
		["called", 23330], 
		["spoken", 23085], 
		["text", 22672], 
		["forms", 22410], 
		["refer", 21825], 
		["modern", 20685], 
		["letters", 20633], 
		["letter", 20288], 
		["standard", 19483], 
		["speakers", 17581], 
		["include", 16572], 
		["case", 16026], 
		["derived", 15405], 
		["referred", 14955], 
		["dialect", 14896], 
		["note", 14794], 
		["system", 14469], 
		["refers", 14460], 
		["considered", 14424], 
		["numbers", 14305], 
		["generally", 14191], 
		["person", 13218], 
		["official", 13202], 
		["origin", 12983], 
		["phrase", 12851], 
		["characters", 12821], 
		["three", 12738], 
		["examples", 12645], 
		["dialects", 12625], 
		["commonly", 12220], 
		["based", 12212], 
		["original", 11929], 
		["alphabet", 11877], 
		["century", 11845], 
		["second", 11649], 
		["usage", 11514]
	], 
	[
		["medical", 81136], 
		["health", 62222], 
		["patients", 52027], 
		["hospital", 50570], 
		["disease", 50131], 
		["treatment", 47234], 
		["cells", 44762], 
		["blood", 41010], 
		["medicine", 35521], 
		["cell", 35353], 
		["cancer", 33582], 
		["drug", 31790], 
		["care", 29153], 
		["brain", 27701], 
		["protein", 25276], 
		["clinical", 24232], 
		["patient", 23703], 
		["human", 23609], 
		["body", 23564], 
		["gene", 23558], 
		["surgery", 23230], 
		["effects", 22509], 
		["dna", 21575], 
		["study", 21470], 
		["cases", 21286], 
		["drugs", 20795], 
		["symptoms", 20597], 
		["include", 20222], 
		["studies", 19473], 
		["risk", 18956], 
		["heart", 18831], 
		["system", 18779], 
		["therapy", 18259], 
		["associated", 18166], 
		["diseases", 17413], 
		["pain", 16328], 
		["syndrome", 16059], 
		["including", 15603], 
		["common", 15528], 
		["proteins", 15371], 
		["levels", 14738], 
		["tissue", 14680], 
		["skin", 14437], 
		["muscle", 14436], 
		["infection", 13986], 
		["genetic", 13698], 
		["activity", 13262], 
		["bone", 13248], 
		["genes", 12996], 
		["called", 12992]
	], 
	[
		["club", 291842], 
		["team", 241041], 
		["league", 239635], 
		["season", 219303], 
		["played", 197371], 
		["cup", 178327], 
		["football", 159512], 
		["match", 114696], 
		["final", 108423], 
		["won", 100308], 
		["division", 87234], 
		["clubs", 81631], 
		["teams", 81039], 
		["second", 80340], 
		["matches", 75647], 
		["championship", 68096], 
		["scored", 67743], 
		["goals", 65912], 
		["play", 65408], 
		["playing", 63517], 
		["round", 63266], 
		["players", 62152], 
		["win", 60486], 
		["game", 59595], 
		["side", 59044], 
		["united", 56338], 
		["national", 55634], 
		["games", 55024], 
		["fc", 54618], 
		["goal", 54103], 
		["time", 53022], 
		["career", 52535], 
		["debut", 50146], 
		["year", 48994], 
		["player", 48653], 
		["rugby", 47338], 
		["three", 47010], 
		["tournament", 45088], 
		["years", 43663], 
		["manager", 41932], 
		["stadium", 40446], 
		["top", 39879], 
		["champions", 39613], 
		["joined", 38914], 
		["lost", 38881], 
		["signed", 38388], 
		["city", 35640], 
		["competition", 34291], 
		["premier", 33871], 
		["winning", 33843]
	], 
	[
		["published", 256420], 
		["book", 250232], 
		["books", 140892], 
		["work", 110416], 
		["wrote", 107171], 
		["magazine", 85501], 
		["writing", 77030], 
		["written", 76898], 
		["works", 75120], 
		["novel", 74155], 
		["history", 66926], 
		["author", 61923], 
		["story", 60417], 
		["stories", 60383], 
		["life", 55012], 
		["editor", 53731], 
		["literature", 51738], 
		["edition", 51666], 
		["press", 51634], 
		["poetry", 50407], 
		["literary", 47623], 
		["journal", 46512], 
		["publication", 46430], 
		["writer", 45833], 
		["times", 42528], 
		["articles", 42371], 
		["newspaper", 41887], 
		["publishing", 40501], 
		["english", 40033], 
		["writers", 39788], 
		["library", 38689], 
		["years", 37018], 
		["paper", 36787], 
		["collection", 35651], 
		["short", 34783], 
		["series", 34601], 
		["fiction", 33783], 
		["novels", 33268], 
		["review", 32437], 
		["time", 32015], 
		["poems", 31090], 
		["including", 29809], 
		["poem", 28743], 
		["early", 28501], 
		["issue", 27897], 
		["authors", 27552], 
		["poet", 27213], 
		["publications", 26996], 
		["volume", 26726], 
		["article", 26539]
	], 
	[
		["german", 172197], 
		["germany", 121131], 
		["von", 58106], 
		["berlin", 49291], 
		["war", 41302], 
		["der", 36579], 
		["1945", 29360], 
		["austria", 27860], 
		["vienna", 25456], 
		["ii", 23359], 
		["austrian", 21004], 
		["die", 20967], 
		["swiss", 20006], 
		["switzerland", 18646], 
		["nazi", 18459], 
		["und", 18176], 
		["hungarian", 18163], 
		["czech", 17813], 
		["munich", 16825], 
		["1933", 15681], 
		["hitler", 15400], 
		["prague", 15064], 
		["hungary", 15010], 
		["prussia", 14830], 
		["1938", 14611], 
		["1939", 14100], 
		["karl", 13856], 
		["hamburg", 13457], 
		["1936", 12530], 
		["founded", 12390], 
		["wilhelm", 12008], 
		["otto", 11766], 
		["died", 11724], 
		["1934", 11525], 
		["history", 11496], 
		["1935", 11044], 
		["prussian", 11032], 
		["ss", 11017], 
		["hans", 10920], 
		["friedrich", 10895], 
		["born", 10669], 
		["frankfurt", 10594], 
		["johann", 10121], 
		["franz", 10023], 
		["1930", 9937], 
		["1932", 9806], 
		["east", 9756], 
		["1937", 9748], 
		["1940", 9535], 
		["pomerania", 9424]
	]
]}

================================================
FILE: toolkits/topic_modeling/lda_sequential_cgs.cpp
================================================
/*  
 * Copyright (c) 2009 Carnegie Mellon University. 
 *     All rights reserved.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an "AS
 *  IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 *  express or implied.  See the License for the specific language
 *  governing permissions and limitations under the License.
 *
 *
 */


#include <iostream>
#include <iomanip>
#include <fstream>
#include <algorithm>


#include <stdint.h>
#include <vector>
#include <map>

#include <boost/program_options.hpp>
#include <boost/math/special_functions/gamma.hpp>


#include <graphlab.hpp>


#include <graphlab/macros_def.hpp>

typedef uint32_t word_id_type;
typedef uint32_t doc_id_type;
typedef uint16_t topic_id_type;
typedef uint32_t count_type;
#define NULL_TOPIC topic_id_type(-1)

struct token_type {
  word_id_type word;
  doc_id_type doc;
  token_type(const word_id_type& word = 0, const doc_id_type& doc = 0) : 
    word(word), doc(doc) { }
};
std::ostream& operator<<(std::ostream& out, const token_type& tok) {
  return out << "(" << tok.word << ", " << tok.doc << ")";
}


struct corpus_type {
  size_t nwords, ndocs, ntokens;
  std::vector< token_type > tokens;
  std::vector<std::string> dictionary;
  std::vector< word_id_type > ntokens_in_doc;
  
  corpus_type(const std::string& dictionary_fname, 
              const std::string& counts_fname ) : 
    nwords(0), ndocs(0), ntokens(0) {
    dictionary.reserve(20000);
    ntokens_in_doc.reserve(5000);
    tokens.reserve(100000);
    load_dictionary(dictionary_fname);
    load_counts(counts_fname);
  }
  void load_dictionary(const std::string& fname) {
    std::ifstream fin(fname.c_str());
    std::string str;
    while(fin.good()) {
      std::getline(fin, str);
      if(fin.good()) { dictionary.push_back(str); nwords++; }
    }
    fin.close();
  }

  void load_counts(const std::string& fname)  {
    std::ifstream fin(fname.c_str());    
    while(fin.good()) {
      // Read a collection of tokens
      const size_t NULL_VALUE(-1);
      size_t word = NULL_VALUE, doc = NULL_VALUE, count = NULL_VALUE;
      fin >> doc >> word >> count;
      if(fin.good()) {
        assert(word != NULL_VALUE && doc != NULL_VALUE && count != NULL_VALUE);
        // update the doc counter
        ndocs = std::max(ndocs, doc + 1);
        // Assert valid word
        assert(word < nwords);
        // Update the words in document counter
        if(doc >= ntokens_in_doc.size())
          ntokens_in_doc.resize(doc+1, 0);
        ntokens_in_doc[doc] += count;
        // Add all the tokens
        token_type tok; tok.word = word; tok.doc = doc;
        for(size_t i = 0; i < count; ++i) tokens.push_back(tok);
      }
    }
    fin.close();
    ntokens = tokens.size();
  } // end of load counts

  void shuffle_tokens() { graphlab::random::shuffle(tokens); }
}; // end of corpus


template<typename T>
class matrix {
private:
  size_t _rows, _cols;
  std::vector<T> data;

  const size_t linear_index(const size_t& i, const size_t& j) const {
    assert(i < _rows && j < _cols);
    return i + j * _rows;
  }

public:
  matrix(const size_t& rows, const size_t& cols, const T& zero = T(0)) :
    _rows(rows), _cols(cols), data(rows*cols, zero) { };
  const T& operator()(const size_t& i, const size_t& j) const {
    return data[linear_index(i,j)];
  }
  size_t rows() const { return _rows; }
  size_t cols() const { return _cols; }
  T& operator()(const size_t& i, const size_t& j) {
    return data[linear_index(i,j)];
  }
  const T& operator()(const size_t& i) const {
    assert(i < data.size());
    return data[i];
  }
  T& operator()(const size_t& i) {
    assert(i < data.size());
    return data[i];
  }
  void zeros() {
    std::fill(data.begin(), data.end(), T(0));
  }
  void operator+=(const matrix& other) {
    assert(_rows == other._rows);
    assert(_cols == other._cols);
    for(size_t i = 0; i < data.size(); ++i) data[i] += other.data[i];
  }
  T sum() const {
    T z(0);
    for(size_t i = 0; i < data.size(); ++i) z += data[i];
    return z;
  }
}; // end of matrix 
typedef matrix<count_type> mat_type;


class collapsed_gibbs {
public:
  const corpus_type* corpus_ptr;
  const size_t ntopics;
  const double alpha, beta;
 
  std::vector< topic_id_type > topics;
  //! n_td(t,d) Number of occurences of topic t in document d
  mat_type n_td;
  //! n_wt(w,t) Number of occurences of word w in topic t
  mat_type n_wt;
  //! n_t(t) The total number of words assigned to topic t
  mat_type n_t;
  //! number of times a token was assigned to a new topic
  size_t nchanges;

  collapsed_gibbs(const corpus_type& corpus, 
                  const size_t& ntopics,
                  const double& alpha,
                  const double& beta) : 
    corpus_ptr(&corpus), ntopics(ntopics), alpha(alpha),
    beta(beta), topics(corpus.ntokens, NULL_TOPIC),
    n_td(ntopics, corpus.ndocs, 0),
    n_wt(corpus.nwords, ntopics, 0),
    n_t(ntopics, 1, 0),
    nchanges(0) { }
  
  
  void iterate() {
    assert(corpus_ptr != NULL);
    const corpus_type& corpus = *corpus_ptr;
    // Reset the number of changes
    nchanges = 0;
    std::vector<double> conditional(ntopics);

    // Loop over all the tokens
    for(size_t i = 0; i < corpus.ntokens; ++i) {
      // Get the word and document for the ith token
      const word_id_type w = corpus.tokens[i].word;
      const doc_id_type d = corpus.tokens[i].doc;
      const topic_id_type old_topic = topics[i];

      // Remove the word from the current counters
      if(old_topic != NULL_TOPIC) {
        --n_td(old_topic, d); --n_wt(w, old_topic), --n_t(old_topic);
      }

      // Construct the conditional
      double normalizer = 0;
      for(size_t t = 0; t < ntopics; ++t) {
        conditional[t] = (alpha + n_td(t,d)) * (beta + n_wt(w,t)) /
          (beta * corpus.nwords + n_t(t)); 
        normalizer += conditional[t];
      }
      assert(normalizer > 0);

      // Draw a new value
      topic_id_type new_topic = 0;
      // normalize and then sample
      for(size_t t = 0; t < ntopics; ++t) conditional[t] /= normalizer;
      new_topic = graphlab::random::multinomial(conditional);

      // Update the topic assignment and counters
      topics[i] = new_topic;
      if(new_topic != old_topic) nchanges++;
      ++n_td(new_topic, d); ++n_wt(w, new_topic), ++n_t(new_topic);
    } // end of loop over tokens

    const size_t n_td_sum = n_td.sum();
    const size_t n_wt_sum = n_wt.sum();
    const size_t n_t_sum = n_t.sum();
    assert(n_td_sum == corpus.ntokens);
    assert(n_wt_sum == corpus.ntokens);
    assert(n_t_sum == corpus.ntokens);
  }
};


double log_likelihood(const double& alpha, const double& beta,
                      const mat_type& n_td, const mat_type& n_wt) {
  using boost::math::lgamma;
  const size_t ndocs  = n_td.cols();
  const size_t ntopics = n_td.rows();
  const size_t nwords = n_wt.rows();

  mat_type n_t(ntopics,1, 0);
  for(size_t t = 0; t < n_wt.cols(); ++t) 
    for(size_t w = 0; w < n_wt.rows(); ++w) n_t(t) += n_wt(w,t);
      
  
  // Matlab Functions:
  //
  //  llik_w_given_z = ...
  //    ntopics * (gammaln(nwords * beta) - nwords * gammaln(beta)) + ...
  //    sum((sum(gammaln(n_wt + beta)) - gammaln( sum(n_wt) + nwords*beta)));
  //
  //  llik_z = ...
  //    ndocs * (gammaln(ntopics * alpha) - ntopics * gammaln(alpha)) + ...
  //    sum(sum(gammaln(n_td + alpha)) - gammaln(sum(n_td) + ntopics * alpha));

  double llik_words_given_topics = 
    ntopics * (lgamma(nwords * beta) - nwords * lgamma(beta));
  for(size_t t = 0; t < ntopics; ++t) {
    for(size_t w = 0; w < nwords; ++w) {
      llik_words_given_topics += lgamma(n_wt(w,t) + beta);
    }
    llik_words_given_topics -= lgamma(n_t(t) + nwords * beta);
  }
  double llik_topics = ndocs * (lgamma(ntopics * alpha) - ntopics * lgamma(alpha));
  for(size_t d = 0; d < ndocs; ++d) {
    size_t ntokens_in_doc = 0;
    for(size_t t = 0; t < ntopics; ++t) {
      llik_topics += lgamma(n_td(t,d) + alpha);
      ntokens_in_doc += n_td(t,d); 
    }
    llik_topics -= lgamma(ntokens_in_doc + ntopics * alpha);
  }
  return llik_words_given_topics + llik_topics;
} // end of log_likelihood


void display_top(const corpus_type& corpus,
                 const mat_type& n_wt,
                 const size_t& ntop) {
  assert(ntop > 0);
  const size_t nwords = n_wt.rows();
  const size_t ntopics = n_wt.cols();
  typedef std::pair<size_t, word_id_type> cw_pair_type;
  for(size_t t = 0; t < ntopics; ++t) {
    std::set< cw_pair_type > top_words;
    for(size_t w = 0; w < nwords; ++w) {
      if(top_words.size() < ntop || n_wt(w,t) > top_words.begin()->first) {
        top_words.insert(std::make_pair(n_wt(w,t), w));
        if(top_words.size() > ntop) top_words.erase(top_words.begin());
      }
    }
    std::cout << std::endl;
    rev_foreach(const cw_pair_type& pair, top_words) {
      std::cout << corpus.dictionary.at(pair.second) << ", ";
    }
    std::cout << std::endl;
  }
} // end of display top


int main(int argc, char** argv) {

  std::string dictionary_fname("dictionary.txt");
  std::string counts_fname("counts.tsv");
  size_t ntopics(50);
  size_t nburnin(50);
  size_t nsamples(10);
  double alpha(50.0/double(ntopics));
  double beta(0.1);
  size_t topk(20);
  std::string llik_fname("llik.txt");
  std::string doctop_fname("doctop.txt");
  std::string wordtop_fname("wordtop.txt");


  // Parse command line options
  namespace po = boost::program_options;
  po::options_description desc("LDA sampler code");
  desc.add_options()
    ("help", "produce help message")
    ("dictionary", po::value<std::string>(&dictionary_fname)->
     default_value(dictionary_fname), "Dictionary file")
    ("counts", po::value<std::string>(&counts_fname)->
     default_value(counts_fname), "Counts file")
    ("ntopics", po::value<size_t>(&ntopics)->
     default_value(ntopics), "Number of topics")
    ("nburnin", po::value<size_t>(&nburnin)->
     default_value(nburnin), "Number of iterations")
    ("nsamples", po::value<size_t>(&nsamples)->
     default_value(nsamples), "Number of iterations")
    ("alpha", po::value<double>(&alpha)->
     default_value(alpha), "Alpha prior")
    ("beta", po::value<double>(&beta)->
     default_value(beta), "Beta prior")
    ("doctop_fname", po::value<std::string>(&doctop_fname)->
     default_value(doctop_fname), "doctop_fname")
    ("wordtop_fname", po::value<std::string>(&wordtop_fname)->
     default_value(wordtop_fname), "wordtop_fname")
    ("topk", po::value<size_t>(&topk)->
     default_value(topk), "number of top k to show");
  
  po::variables_map vm;
  po::store(po::parse_command_line(argc, argv, desc), vm);
  po::notify(vm);    
  if (vm.count("help")) {
    std::cout << desc << "\n";
    return EXIT_FAILURE;
  }

  if (dictionary_fname.length() == 0 || counts_fname.length() == 0) {
    std::cout << "Both counts and dictionary must be specified" << std::endl;
    std::cout << desc << "\n";
    return EXIT_FAILURE; 
  }
  
  std::cout << "Loading the corpus." << std::endl;
  corpus_type corpus(dictionary_fname, counts_fname);

  std::cout << "Number of words:   " << corpus.nwords << std::endl
            << "Number of docs:    " << corpus.ndocs << std::endl
            << "Number of tokens:  " << corpus.ntokens << std::endl
            << "Ntopics:           " << ntopics << std::endl
            << "Alpha:             " << alpha   << std::endl
            << "Beta:              " << beta    << std::endl;

  std::cout << "Seeding Generator: " << std::endl;
  graphlab::random::nondet_seed();
  std::cout << "Shuffling corpus: " << std::endl;
  corpus.shuffle_tokens();


  std::cout << "Constructing Gibbs Sampler: " << std::endl;
  collapsed_gibbs gibbs(corpus, ntopics, alpha, beta);
  
  
  std::ofstream llik_fout(llik_fname.c_str());
  llik_fout.precision(16);

  std::cout << "Starting Burnin" << std::endl;
  for(size_t i = 0; i < nburnin; ++i) {
    std::cout << "Burnin iteration: " << i << std::endl;
    gibbs.iterate();
    std::cout << "Computing top " << topk << " of each topic" << std::endl;
    display_top(corpus, gibbs.n_wt, topk);
    std::cout << "Number of changes: " << gibbs.nchanges << std::endl
              << "Prop. Changes:     " 
              << double(gibbs.nchanges)/ corpus.ntokens << std::endl;
    double llik = log_likelihood(gibbs.alpha, gibbs.beta, gibbs.n_td, gibbs.n_wt);
    std::cout << "Log-likelihood:    " // std::setprecision(8) <<
              <<  llik << std::endl;
    llik_fout << llik << '\t' << gibbs.nchanges << std::endl;

  }
  std::cout << "Finished burnin.  Preparing final sample set." << std::endl;
 
  mat_type n_td(ntopics, corpus.ndocs, 0);
  mat_type n_wt(corpus.nwords, ntopics, 0);
  mat_type n_t(ntopics, 1, 0);

  for(size_t i = 0; i < nsamples; ++i) {
    std::cout << "Sampling iteration: " << i << std::endl;
    gibbs.iterate();
    std::cout << "Number of changes: " << gibbs.nchanges << std::endl
              << "Prop. Changes:     " 
              << double(gibbs.nchanges)/ corpus.ntokens << std::endl;
    std::cout << "Accumulating sample" << std::endl;
    n_td += gibbs.n_td;
    n_wt += gibbs.n_wt;
    n_t  += gibbs.n_t;

    std::cout << "Computing top " << topk << " of each topic" << std::endl;
    display_top(corpus, n_wt, topk);    
    std::cout << "Number of changes: " << gibbs.nchanges << std::endl
              << "Prop. Changes:     " 
              << double(gibbs.nchanges)/ corpus.ntokens << std::endl;
    double llik = log_likelihood(gibbs.alpha, gibbs.beta, gibbs.n_td, gibbs.n_wt);
    std::cout << "Log-likelihood:    " // std::setprecision(8) <<
              <<  llik << std::endl;
    llik_fout << llik << '\t' << gibbs.nchanges << std::endl;

  }
  llik_fout.close();

  std::cout << "Saving doctop: " << doctop_fname << std::endl;
  std::ofstream doctop_fout(doctop_fname.c_str());
  for(size_t d = 0; d < corpus.ndocs; ++d) {
    double normalizer = ntopics * alpha; 
    for(size_t t = 0; t < ntopics; ++t) 
      normalizer += double(n_td(t,d)) / double(nsamples);
    for(size_t t = 0; t < ntopics; ++t) {
      const double value = 
        (double(n_td(t,d))/double(nsamples) + alpha) / normalizer;
      doctop_fout << value << ((t+1 < ntopics)? '\t' : '\n');
    }
  }
  doctop_fout.close();
  
  std::cout << "Saving wordtop: " << wordtop_fname << std::endl;


  return EXIT_SUCCESS;
}


================================================
FILE: toolkits/topic_modeling/topic_modeling.dox
================================================
/**

\page topic_modeling Topic Modeling


\brief The topic modeling toolkit contains a collection of
applications targeted at clustering documents and extracting topical
representations.  The resulting topical representation can be used as
a feature space in information retrieval tasks and to group topically
related words and documents.

Currently the text modeling toolkit implements a fast asynchronous
collapsed Gibbs sampler for the widely used Latent Dirichlet
Allocation (LDA) model. In the near future we plan to add a Collapsed
Variational Bayesian inference algorithm for the LDA model as well as
some more general topic models.


\section lda_model  The LDA Model 

The <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">LDA
model </a> associates a topic id with each token (word) in each
document in the input corpus.  Conceptually, topic ids correspond to
semantic groups like "foods", "colors", and "politics" however the
association between the id 1, 2, ..., N and the particular topic
meaning "foods", "colors", ... is not know in advance and can be
resolved by running the approximate inference algorithm.  In addition
the LDA model assigns a distribution over topics to each document and
a distribution over term to each topic.  The the topic id for each
token is drawn from the topic distribution for each document.  The
actual word is then drawn from the term distribution for that topic.
At a high-level the LDA model encodes the following intuitive
assumptions:

\li Words in the same document are topically related.

\li Documents that share common terms are topically related. 

Solving for the latent topic assignments of each token as well as the
topic distribution for each document and the term distribution for
each topic is a challenging (NP-Hard) task. Fortunately there are
several approximate inference algorithms that typically can resolve
coherent posterior estimates for the LDA model.


\subsection collapsed_gibbs The Collapsed Gibbs Sampler


The topic modeling toolkit currently implements an asynchronous
variant of the Collapsed Gibbs Sampler described by Griffiths and
Steyvers in their landmark paper <a href="http://www.pnas.org/content/101/suppl.1/5228.full.pdf">Finding
Scientific Topics</a>.  The collapsed Gibbs sampler is a Markov Chain
Monte Carlo (MCMC) algorithm which generates a sequence of topic
assignments for each token that in the limit converge to a sequence of
samples drawn from the posterior distribution.  In practice the
algorithm is run for a sufficient long time to allow the topics to
"converge" (sometimes referred to as burn-in) and then the last few
samples are used to estimate the posterior distribution over topics
for each document and the posterior distribution over words for each
topic.


\subsection parallel_collapsed_gibbs The Parallel Collapsed Gibbs Sampler


The parallelization of the Collapsed Gibbs Sampler is achieved by
drawing new assignments for multiple tokens simultaneously using a
method that is similar to that described by Ahmed et al. 
(<a href="http://dl.acm.org/authorize.cfm?key=6666391">Paper</a>).  
Unfortunately, the collapsed LDA model used to accelerate mixing of
the Gibbs sampler also eliminates any conditional independence
structure needed to obtain a parallel ergodic sampler as described by 
Gonzalez et al. (<a href="http://www.select.cs.cmu.edu/publications/paperdir/aistats2011-gonzalez-low-gretton-guestrin.pdf">Paper</a>)


However, by mapping the collapsed Gibbs sampler into the GraphLab
abstraction we obtain a statistically more efficient algorithm.  To
implement the collapsed Gibbs sampler in GraphLab we construct a
bipartite graph connecting each document with terms that occur in that
document.  Each edge contains the token count and latent topic
assignments for that token.  The GraphLab update function maintains
the term and document counts during the gather and apply phases and
then samples new values for the tokens on the scatter phase.  We
exploit local atomic integer operations and the GraphLab caching model
to immediately propagate changes.  The asynchronous consistency model
ensures that only one token per document term pair is sampled at a
time improving upon the original formulation of the asynchronous Gibbs
sampler described by Ahmed et al. (<a
href="http://dl.acm.org/authorize.cfm?key=6666391">Paper</a>) or the
sampler described by Asuncion et al. (<a
href="http://www.ics.uci.edu/~asuncion/pubs/NIPS_08.pdf">Paper</a>).


\section cgs_lda_usage Usage 
==============

The collapsed Gibbs sampler application (\ref cgs_lda.cpp) takes as an
input a text corpus represented as one or more token files.  Each
token consists of lines in the form:

\verbatim
<docid> <wordid> <count>
\endverbatim

for example a file containing:

\verbatim
0    0     2
0    4     1
1    2     3
\endverbatim
 
implies that the word with id 0 occurs twice in document 0, the word
with id 4 occurs once in document 0, and the word with id 2 occurs
three times in document 1.

On termination the system outputs for each term the number of
occurrences of that term that have been assigned to each topic and for
each document the number of tokens assigned to each document.


\subsection cgs_lda_example Example

To demonstrate how the CGS LDA application works we have obtained a
copy of the Daily Kos bag-of-words data from the <a
href="http://archive.ics.uci.edu/ml/datasets/Bag+of+Words"> UCI
Repository </a> and reformatted it for the cgs_lda application.  You
can download the reformatted data from <a
href="http://code.google.com/p/graphlabapi/downloads/detail?name=daily_kos.tar.bz2">here</a>.
Once extracted the folder contains:

\verbatim
> ls -lR daily_kos
total 120
-rw-r--r--  1 jegonzal  staff    904 Jul  1 22:37 README
-rw-r--r--@ 1 jegonzal  staff  55467 Jul  1 22:21 dictionary.txt
drwxr-xr-x  3 jegonzal  staff    102 Jul  1 22:21 tokens

./tokens:
total 7960
-rw-r--r--  1 jegonzal  staff  4074516 Jul  1 22:21 doc_word_count.tsv
\endverbatim

To run the CGS_LDA GraphLab application on a single machine we simply run:
\verbatim
> ./cgs_lda --corpus ./daily_kos/tokens --dictionary ./daily_kos/dictionary.txt
\endverbatim

This will run indefinitley display the top words in each topic every
10 seconds.  To help visualize the output if you open the webpage
\verbatim
graphlab/toolkits/topic_modeling/http/index.html
\endverbatim
We render a word cloud webpage that connects directly to the cgs_lda 
appliction's internal web-server running on localhost port 8090. 

In most cases we will also be interested in collecting the final
assignments. This can be done by running:
\verbatim
> ./cgs_lda --corpus ./daily_kos/tokens --dictionary ./daily_kos/dictionary.txt \
   --word_dir word_counts --doc_dir doc_counts --burnin=60 
\endverbatim

This will run the cgs_lda sampler for roughly 60 seconds and the save
the counts of tokens in each topic for the words and documents in the
files \c word_counts_x_of_x and \c doc_counts_x_of_x .  If instead you
would like to save the counts to seperate folders you can prepend the
folder path.

By default GraphLab runs with two threads.  However we can increase the
parallelism on a single machine by increasing the number of threads:
\verbatim
> ./cgs_lda --corpus ./daily_kos/tokens --dictionary ./daily_kos/dictionary.txt \
  --ncpus=8
\endverbatim

The cgs_lda application can run in the distributed setting as well simply by
using MPI to launch it:
\verbatim
> mpiexec --hostfile machine_list.txt -n 16 ./cgs_lda \
  --corpus ./daily_kos/tokens --dictionary ./daily_kos/dictionary.txt \
  --ncpus=8
\endverbatim

This will run 16 instances each consuming 8 on the machines in the 
\c machine_list.txt file.  Each of these instances will automatically
communicate splitting the work as well as the memory requirements.  It
is really that easy!

Since we are running in the distributed setting it is convenient to be
able to read and write to a distributed filesystem.  We have built HDFS 
support into the GraphLab abstraction therefore we can simply change 
the arguments to be:
\verbatim
> mpiexec --hostfile machine_list.txt -n 16 ./cgs_lda \
  --corpus hdfs://bros.ml.cmu.edu/daily_kos/tokens \
  --dictionary hdfs://bros.ml.cmu.edu/daily_kos/dictionary.txt \
  --ncpus=8
\endverbatim


\section convergence Tracking progress

We have added a Likelihood printout every interval seconds (defined using the --interval command line argument)
which captures the convergence progress of the LDA algorithm. The Likelihood should go up every interval.

\section cgs_lda_options Command Line Options

There are a wide range of options available when calling the cgs_lda
program:


\li <b>--corpus</b> _Required_ The path to the token file(s).  This
can be a folder, a file, or a folder plus a file prefix. In addition
this can be a path in hdfs (e.g., hdfs://namenode/tokens/).

\li <b>--help</b> Display the help screen listing the available options

\li <b>--dictionary</b> (Optional) The path to the dictionary
file. This can be a local path or a path on hdfs 
(e.g., hdfs://namenode/dictionary.txt).  If no dictionary is provided then
incremental topk words lists will not be generated and the word list webpage 
will not be available.

\li <b>--engine</b> (Optional, Default: asynchronous) The engine type to
use when executing the vertex program.  Accepted values are:
       - <b>synchronous</b>: All tokens are sampled simultaneously leading to
           faster computation but slower convergence.
       - <b>asynchronous</b>: Tokens are sampled as resources become available
           and token conditionals are updated immediately after sampling.  This
           reduces parallelism and sampling speed but increases the rate of
           convergence.

\li <b>--ntopics</b> (Optional, Default: 50) The number of topics to use in the
LDA model.  Using fewer topics will increase speed and result in more "mixed"
concepts.  Using more topics will slow down sampling but lead to more specific
topics.

\li <b>--alpha</b> (Optional, Default 1) The hyper-parameter for the topic 
distribution for each document.  Larger values imply documents have a more 
uniform mix of topics.  Smaller values (less than one) imply that documents
are more focused on a small subsets of topics.  Note that smaller values 
also slow down convergence of the sampler.

\li <b>--beta</b> (Optional, Default 0.1) The hyper-parameter for the
word distribution in each topic.  Larger values imply that topics contain
all words equally and smaller values (less than one) imply that topics are
focused on a small set of words.  Note that smaller values also slow down 
convergence of the sampler.

\li <b>--topk</b> (Optional, Default 5) The number of words to show in
each topic when incrementally listing the top words in each topic. 
This also affects the word cloud viewer. 

\li <b>--interval</b> (Optional, Default 10) The time in seconds between
when the incremental listing of top words is presented.  

\li <b>--max_count</b> (Optional, Default 100) The maximum number of 
occurrences of a token in a document.  If a token occurs more than 
\c max_count then it is reported as occurring \c max_count times. 
This ensures that overly frequent words do not dominate documents. 

\li <b>--loadjson</b> (Optional, Default false) This flag is used to turn
on the experimental JSON graph loader that reads graphs constructed using
external graph builder libraries.  If set to true then the \c --corpus
argument must point to the JSON files.

\li <b>--burnin</b> (Optional, Default -1) The time in seconds to run 
the sampler before the sample is saved to file (and the sampler terminates). 
If the value is less than zero then the sample will run indefinitely.

\li <b>--doc_dir</b> (Optional, Default empty) The location (path/prefix)
to save the final topic counts for each document after burnin.  This can 
also be an hdfs path (e.g., hdfs://namenode/folder/prefix).  If this is
not set then the per document topic counts are not saved.

\li <b>--word_dir</b> (Optional, Default empty) The location (path/prefix)
to save the final topic counts for each word after burnin.  This can 
also be an hdfs path (e.g., hdfs://namenode/folder/prefix).  If this is
not set then the per word topic counts are not saved.

\li <b>--ncpus</b> (Optional, Default 2) The number of local computation 
threads to use on each machine.  This should typically match the number 
of physical cores. 

\li <b>--scheduler</b> (Optional, Default sweep) The scheduler to use when 
running with the asynchronous engine.  The default is typically sufficient. 

\li <b>--engine_opts</b> (Optional, Default empty) Any additional engine
options. See <b>--engine_help</b> for a list of options.


\li <b>--graph_opts</b> (Optional, Default empty) Any additional graph
options. See <b>--graph_help</b> for a list of options.

\li <b>--scheduler_opts</b> (Optional, Default empty) Any additional scheduler
options. See <b>--scheduler_help</b> for a list of options.


*/